GNU Linux-libre 6.9-gnu
[releases.git] / fs / bcachefs / backpointers.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "error.h"
13
14 #include <linux/mm.h>
15
16 static bool extent_matches_bp(struct bch_fs *c,
17                               enum btree_id btree_id, unsigned level,
18                               struct bkey_s_c k,
19                               struct bpos bucket,
20                               struct bch_backpointer bp)
21 {
22         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23         const union bch_extent_entry *entry;
24         struct extent_ptr_decoded p;
25
26         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27                 struct bpos bucket2;
28                 struct bch_backpointer bp2;
29
30                 if (p.ptr.cached)
31                         continue;
32
33                 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34                 if (bpos_eq(bucket, bucket2) &&
35                     !memcmp(&bp, &bp2, sizeof(bp)))
36                         return true;
37         }
38
39         return false;
40 }
41
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43                              enum bkey_invalid_flags flags,
44                              struct printbuf *err)
45 {
46         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47
48         /* these will be caught by fsck */
49         if (!bch2_dev_exists2(c, bp.k->p.inode))
50                 return 0;
51
52         struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
53         struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
54         int ret = 0;
55
56         bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
57                          !bpos_eq(bp.k->p, bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset)),
58                          c, err,
59                          backpointer_bucket_offset_wrong,
60                          "backpointer bucket_offset wrong");
61 fsck_err:
62         return ret;
63 }
64
65 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
66 {
67         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
68                bch2_btree_id_str(bp->btree_id),
69                bp->level,
70                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
71                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
72                bp->bucket_len);
73         bch2_bpos_to_text(out, bp->pos);
74 }
75
76 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
77 {
78         if (bch2_dev_exists2(c, k.k->p.inode)) {
79                 prt_str(out, "bucket=");
80                 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
81                 prt_str(out, " ");
82         }
83
84         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
85 }
86
87 void bch2_backpointer_swab(struct bkey_s k)
88 {
89         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
90
91         bp.v->bucket_offset     = swab40(bp.v->bucket_offset);
92         bp.v->bucket_len        = swab32(bp.v->bucket_len);
93         bch2_bpos_swab(&bp.v->pos);
94 }
95
96 static noinline int backpointer_mod_err(struct btree_trans *trans,
97                                         struct bch_backpointer bp,
98                                         struct bkey_s_c bp_k,
99                                         struct bkey_s_c orig_k,
100                                         bool insert)
101 {
102         struct bch_fs *c = trans->c;
103         struct printbuf buf = PRINTBUF;
104
105         if (insert) {
106                 prt_printf(&buf, "existing backpointer found when inserting ");
107                 bch2_backpointer_to_text(&buf, &bp);
108                 prt_newline(&buf);
109                 printbuf_indent_add(&buf, 2);
110
111                 prt_printf(&buf, "found ");
112                 bch2_bkey_val_to_text(&buf, c, bp_k);
113                 prt_newline(&buf);
114
115                 prt_printf(&buf, "for ");
116                 bch2_bkey_val_to_text(&buf, c, orig_k);
117
118                 bch_err(c, "%s", buf.buf);
119         } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
120                 prt_printf(&buf, "backpointer not found when deleting");
121                 prt_newline(&buf);
122                 printbuf_indent_add(&buf, 2);
123
124                 prt_printf(&buf, "searching for ");
125                 bch2_backpointer_to_text(&buf, &bp);
126                 prt_newline(&buf);
127
128                 prt_printf(&buf, "got ");
129                 bch2_bkey_val_to_text(&buf, c, bp_k);
130                 prt_newline(&buf);
131
132                 prt_printf(&buf, "for ");
133                 bch2_bkey_val_to_text(&buf, c, orig_k);
134
135                 bch_err(c, "%s", buf.buf);
136         }
137
138         printbuf_exit(&buf);
139
140         if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
141                 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
142         } else {
143                 return 0;
144         }
145 }
146
147 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
148                                 struct bpos bucket,
149                                 struct bch_backpointer bp,
150                                 struct bkey_s_c orig_k,
151                                 bool insert)
152 {
153         struct btree_iter bp_iter;
154         struct bkey_s_c k;
155         struct bkey_i_backpointer *bp_k;
156         int ret;
157
158         bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
159         ret = PTR_ERR_OR_ZERO(bp_k);
160         if (ret)
161                 return ret;
162
163         bkey_backpointer_init(&bp_k->k_i);
164         bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
165         bp_k->v = bp;
166
167         if (!insert) {
168                 bp_k->k.type = KEY_TYPE_deleted;
169                 set_bkey_val_u64s(&bp_k->k, 0);
170         }
171
172         k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
173                                bp_k->k.p,
174                                BTREE_ITER_INTENT|
175                                BTREE_ITER_SLOTS|
176                                BTREE_ITER_WITH_UPDATES);
177         ret = bkey_err(k);
178         if (ret)
179                 goto err;
180
181         if (insert
182             ? k.k->type
183             : (k.k->type != KEY_TYPE_backpointer ||
184                memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
185                 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
186                 if (ret)
187                         goto err;
188         }
189
190         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
191 err:
192         bch2_trans_iter_exit(trans, &bp_iter);
193         return ret;
194 }
195
196 /*
197  * Find the next backpointer >= *bp_offset:
198  */
199 int bch2_get_next_backpointer(struct btree_trans *trans,
200                               struct bpos bucket, int gen,
201                               struct bpos *bp_pos,
202                               struct bch_backpointer *bp,
203                               unsigned iter_flags)
204 {
205         struct bch_fs *c = trans->c;
206         struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
207         struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
208         struct bkey_s_c k;
209         int ret = 0;
210
211         if (bpos_ge(*bp_pos, bp_end_pos))
212                 goto done;
213
214         if (gen >= 0) {
215                 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
216                                        bucket, BTREE_ITER_CACHED|iter_flags);
217                 ret = bkey_err(k);
218                 if (ret)
219                         goto out;
220
221                 if (k.k->type != KEY_TYPE_alloc_v4 ||
222                     bkey_s_c_to_alloc_v4(k).v->gen != gen)
223                         goto done;
224         }
225
226         *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
227
228         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
229                                      *bp_pos, iter_flags, k, ret) {
230                 if (bpos_ge(k.k->p, bp_end_pos))
231                         break;
232
233                 *bp_pos = k.k->p;
234                 *bp = *bkey_s_c_to_backpointer(k).v;
235                 goto out;
236         }
237 done:
238         *bp_pos = SPOS_MAX;
239 out:
240         bch2_trans_iter_exit(trans, &bp_iter);
241         bch2_trans_iter_exit(trans, &alloc_iter);
242         return ret;
243 }
244
245 static void backpointer_not_found(struct btree_trans *trans,
246                                   struct bpos bp_pos,
247                                   struct bch_backpointer bp,
248                                   struct bkey_s_c k)
249 {
250         struct bch_fs *c = trans->c;
251         struct printbuf buf = PRINTBUF;
252         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
253
254         /*
255          * If we're using the btree write buffer, the backpointer we were
256          * looking at may have already been deleted - failure to find what it
257          * pointed to is not an error:
258          */
259         if (likely(!bch2_backpointers_no_use_write_buffer))
260                 return;
261
262         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
263                    bp.level ? "btree node" : "extent");
264         prt_printf(&buf, "bucket: ");
265         bch2_bpos_to_text(&buf, bucket);
266         prt_printf(&buf, "\n  ");
267
268         prt_printf(&buf, "backpointer pos: ");
269         bch2_bpos_to_text(&buf, bp_pos);
270         prt_printf(&buf, "\n  ");
271
272         bch2_backpointer_to_text(&buf, &bp);
273         prt_printf(&buf, "\n  ");
274         bch2_bkey_val_to_text(&buf, c, k);
275         if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
276                 bch_err_ratelimited(c, "%s", buf.buf);
277         else
278                 bch2_trans_inconsistent(trans, "%s", buf.buf);
279
280         printbuf_exit(&buf);
281 }
282
283 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
284                                          struct btree_iter *iter,
285                                          struct bpos bp_pos,
286                                          struct bch_backpointer bp,
287                                          unsigned iter_flags)
288 {
289         if (likely(!bp.level)) {
290                 struct bch_fs *c = trans->c;
291                 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
292                 struct bkey_s_c k;
293
294                 bch2_trans_node_iter_init(trans, iter,
295                                           bp.btree_id,
296                                           bp.pos,
297                                           0, 0,
298                                           iter_flags);
299                 k = bch2_btree_iter_peek_slot(iter);
300                 if (bkey_err(k)) {
301                         bch2_trans_iter_exit(trans, iter);
302                         return k;
303                 }
304
305                 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
306                         return k;
307
308                 bch2_trans_iter_exit(trans, iter);
309                 backpointer_not_found(trans, bp_pos, bp, k);
310                 return bkey_s_c_null;
311         } else {
312                 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
313
314                 if (IS_ERR_OR_NULL(b)) {
315                         bch2_trans_iter_exit(trans, iter);
316                         return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
317                 }
318                 return bkey_i_to_s_c(&b->key);
319         }
320 }
321
322 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
323                                         struct btree_iter *iter,
324                                         struct bpos bp_pos,
325                                         struct bch_backpointer bp)
326 {
327         struct bch_fs *c = trans->c;
328         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
329         struct btree *b;
330
331         BUG_ON(!bp.level);
332
333         bch2_trans_node_iter_init(trans, iter,
334                                   bp.btree_id,
335                                   bp.pos,
336                                   0,
337                                   bp.level - 1,
338                                   0);
339         b = bch2_btree_iter_peek_node(iter);
340         if (IS_ERR_OR_NULL(b))
341                 goto err;
342
343         BUG_ON(b->c.level != bp.level - 1);
344
345         if (extent_matches_bp(c, bp.btree_id, bp.level,
346                               bkey_i_to_s_c(&b->key),
347                               bucket, bp))
348                 return b;
349
350         if (btree_node_will_make_reachable(b)) {
351                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
352         } else {
353                 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
354                 b = NULL;
355         }
356 err:
357         bch2_trans_iter_exit(trans, iter);
358         return b;
359 }
360
361 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
362                                         struct bkey_s_c k)
363 {
364         struct bch_fs *c = trans->c;
365         struct btree_iter alloc_iter = { NULL };
366         struct bkey_s_c alloc_k;
367         struct printbuf buf = PRINTBUF;
368         int ret = 0;
369
370         if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
371                         backpointer_to_missing_device,
372                         "backpointer for missing device:\n%s",
373                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
374                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
375                 goto out;
376         }
377
378         alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
379                                      bp_pos_to_bucket(c, k.k->p), 0);
380         ret = bkey_err(alloc_k);
381         if (ret)
382                 goto out;
383
384         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
385                         backpointer_to_missing_alloc,
386                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
387                         alloc_iter.pos.inode, alloc_iter.pos.offset,
388                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
389                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
390                 goto out;
391         }
392 out:
393 fsck_err:
394         bch2_trans_iter_exit(trans, &alloc_iter);
395         printbuf_exit(&buf);
396         return ret;
397 }
398
399 /* verify that every backpointer has a corresponding alloc key */
400 int bch2_check_btree_backpointers(struct bch_fs *c)
401 {
402         int ret = bch2_trans_run(c,
403                 for_each_btree_key_commit(trans, iter,
404                         BTREE_ID_backpointers, POS_MIN, 0, k,
405                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
406                   bch2_check_btree_backpointer(trans, &iter, k)));
407         bch_err_fn(c, ret);
408         return ret;
409 }
410
411 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
412 {
413         return bpos_eq(l.k->p, r.k->p) &&
414                 bkey_bytes(l.k) == bkey_bytes(r.k) &&
415                 !memcmp(l.v, r.v, bkey_val_bytes(l.k));
416 }
417
418 struct extents_to_bp_state {
419         struct bpos     bucket_start;
420         struct bpos     bucket_end;
421         struct bkey_buf last_flushed;
422 };
423
424 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
425                                struct bkey_s_c extent, unsigned dev)
426 {
427         struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
428         int ret = PTR_ERR_OR_ZERO(n);
429         if (ret)
430                 return ret;
431
432         bch2_bkey_drop_device(bkey_i_to_s(n), dev);
433         return bch2_btree_insert_trans(trans, btree, n, 0);
434 }
435
436 static int check_extent_checksum(struct btree_trans *trans,
437                                  enum btree_id btree, struct bkey_s_c extent,
438                                  enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
439 {
440         struct bch_fs *c = trans->c;
441         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
442         const union bch_extent_entry *entry;
443         struct extent_ptr_decoded p;
444         struct printbuf buf = PRINTBUF;
445         void *data_buf = NULL;
446         struct bio *bio = NULL;
447         size_t bytes;
448         int ret = 0;
449
450         if (bkey_is_btree_ptr(extent.k))
451                 return false;
452
453         bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
454                 if (p.ptr.dev == dev)
455                         goto found;
456         BUG();
457 found:
458         if (!p.crc.csum_type)
459                 return false;
460
461         bytes = p.crc.compressed_size << 9;
462
463         struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
464         if (!bch2_dev_get_ioref(ca, READ))
465                 return false;
466
467         data_buf = kvmalloc(bytes, GFP_KERNEL);
468         if (!data_buf) {
469                 ret = -ENOMEM;
470                 goto err;
471         }
472
473         bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
474         bio->bi_iter.bi_sector = p.ptr.offset;
475         bch2_bio_map(bio, data_buf, bytes);
476         ret = submit_bio_wait(bio);
477         if (ret)
478                 goto err;
479
480         prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
481         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
482         bch2_bkey_val_to_text(&buf, c, extent);
483         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
484         bch2_bkey_val_to_text(&buf, c, extent2);
485
486         struct nonce nonce = extent_nonce(extent.k->version, p.crc);
487         struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
488         if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
489                         c, dup_backpointer_to_bad_csum_extent,
490                         "%s", buf.buf))
491                 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
492 fsck_err:
493 err:
494         if (bio)
495                 bio_put(bio);
496         kvfree(data_buf);
497         percpu_ref_put(&ca->io_ref);
498         printbuf_exit(&buf);
499         return ret;
500 }
501
502 static int check_bp_exists(struct btree_trans *trans,
503                            struct extents_to_bp_state *s,
504                            struct bpos bucket,
505                            struct bch_backpointer bp,
506                            struct bkey_s_c orig_k)
507 {
508         struct bch_fs *c = trans->c;
509         struct btree_iter bp_iter = {};
510         struct btree_iter other_extent_iter = {};
511         struct printbuf buf = PRINTBUF;
512         struct bkey_s_c bp_k;
513         struct bkey_buf tmp;
514         int ret;
515
516         bch2_bkey_buf_init(&tmp);
517
518         if (!bch2_dev_bucket_exists(c, bucket)) {
519                 prt_str(&buf, "extent for nonexistent device:bucket ");
520                 bch2_bpos_to_text(&buf, bucket);
521                 prt_str(&buf, "\n  ");
522                 bch2_bkey_val_to_text(&buf, c, orig_k);
523                 bch_err(c, "%s", buf.buf);
524                 return -BCH_ERR_fsck_repair_unimplemented;
525         }
526
527         if (bpos_lt(bucket, s->bucket_start) ||
528             bpos_gt(bucket, s->bucket_end))
529                 return 0;
530
531         bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
532                                   bucket_pos_to_bp(c, bucket, bp.bucket_offset),
533                                   0);
534         ret = bkey_err(bp_k);
535         if (ret)
536                 goto err;
537
538         if (bp_k.k->type != KEY_TYPE_backpointer ||
539             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
540                 bch2_bkey_buf_reassemble(&tmp, c, orig_k);
541
542                 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
543                         if (bp.level) {
544                                 bch2_trans_unlock(trans);
545                                 bch2_btree_interior_updates_flush(c);
546                         }
547
548                         ret = bch2_btree_write_buffer_flush_sync(trans);
549                         if (ret)
550                                 goto err;
551
552                         bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
553                         ret = -BCH_ERR_transaction_restart_write_buffer_flush;
554                         goto out;
555                 }
556
557                 goto check_existing_bp;
558         }
559 out:
560 err:
561 fsck_err:
562         bch2_trans_iter_exit(trans, &other_extent_iter);
563         bch2_trans_iter_exit(trans, &bp_iter);
564         bch2_bkey_buf_exit(&tmp, c);
565         printbuf_exit(&buf);
566         return ret;
567 check_existing_bp:
568         /* Do we have a backpointer for a different extent? */
569         if (bp_k.k->type != KEY_TYPE_backpointer)
570                 goto missing;
571
572         struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
573
574         struct bkey_s_c other_extent =
575                 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
576         ret = bkey_err(other_extent);
577         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
578                 ret = 0;
579         if (ret)
580                 goto err;
581
582         if (!other_extent.k)
583                 goto missing;
584
585         if (bch2_extents_match(orig_k, other_extent)) {
586                 printbuf_reset(&buf);
587                 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
588                 bch2_bkey_val_to_text(&buf, c, orig_k);
589                 prt_str(&buf, "\n  ");
590                 bch2_bkey_val_to_text(&buf, c, other_extent);
591                 bch_err(c, "%s", buf.buf);
592
593                 if (other_extent.k->size <= orig_k.k->size) {
594                         ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
595                         if (ret)
596                                 goto err;
597                         goto out;
598                 } else {
599                         ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
600                         if (ret)
601                                 goto err;
602                         goto missing;
603                 }
604         }
605
606         ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
607         if (ret < 0)
608                 goto err;
609         if (ret) {
610                 ret = 0;
611                 goto missing;
612         }
613
614         ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
615         if (ret < 0)
616                 goto err;
617         if (ret) {
618                 ret = 0;
619                 goto out;
620         }
621
622         printbuf_reset(&buf);
623         prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
624         bch2_bkey_val_to_text(&buf, c, orig_k);
625         prt_str(&buf, "\n  ");
626         bch2_bkey_val_to_text(&buf, c, other_extent);
627         bch_err(c, "%s", buf.buf);
628         ret = -BCH_ERR_fsck_repair_unimplemented;
629         goto err;
630 missing:
631         printbuf_reset(&buf);
632         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
633                bch2_btree_id_str(bp.btree_id), bp.level);
634         bch2_bkey_val_to_text(&buf, c, orig_k);
635         prt_printf(&buf, "\n  got:   ");
636         bch2_bkey_val_to_text(&buf, c, bp_k);
637
638         struct bkey_i_backpointer n_bp_k;
639         bkey_backpointer_init(&n_bp_k.k_i);
640         n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
641         n_bp_k.v = bp;
642         prt_printf(&buf, "\n  want:  ");
643         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
644
645         if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
646                 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
647
648         goto out;
649 }
650
651 static int check_extent_to_backpointers(struct btree_trans *trans,
652                                         struct extents_to_bp_state *s,
653                                         enum btree_id btree, unsigned level,
654                                         struct bkey_s_c k)
655 {
656         struct bch_fs *c = trans->c;
657         struct bkey_ptrs_c ptrs;
658         const union bch_extent_entry *entry;
659         struct extent_ptr_decoded p;
660         int ret;
661
662         ptrs = bch2_bkey_ptrs_c(k);
663         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
664                 struct bpos bucket_pos;
665                 struct bch_backpointer bp;
666
667                 if (p.ptr.cached)
668                         continue;
669
670                 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
671
672                 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
673                 if (ret)
674                         return ret;
675         }
676
677         return 0;
678 }
679
680 static int check_btree_root_to_backpointers(struct btree_trans *trans,
681                                             struct extents_to_bp_state *s,
682                                             enum btree_id btree_id,
683                                             int *level)
684 {
685         struct bch_fs *c = trans->c;
686         struct btree_iter iter;
687         struct btree *b;
688         struct bkey_s_c k;
689         int ret;
690 retry:
691         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
692                                   0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
693         b = bch2_btree_iter_peek_node(&iter);
694         ret = PTR_ERR_OR_ZERO(b);
695         if (ret)
696                 goto err;
697
698         if (b != btree_node_root(c, b)) {
699                 bch2_trans_iter_exit(trans, &iter);
700                 goto retry;
701         }
702
703         *level = b->c.level;
704
705         k = bkey_i_to_s_c(&b->key);
706         ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
707 err:
708         bch2_trans_iter_exit(trans, &iter);
709         return ret;
710 }
711
712 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
713 {
714         return (struct bbpos) {
715                 .btree  = bp.btree_id,
716                 .pos    = bp.pos,
717         };
718 }
719
720 static u64 mem_may_pin_bytes(struct bch_fs *c)
721 {
722         struct sysinfo i;
723         si_meminfo(&i);
724
725         u64 mem_bytes = i.totalram * i.mem_unit;
726         return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
727 }
728
729 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
730 {
731         return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
732 }
733
734 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
735                                         u64 btree_leaf_mask,
736                                         u64 btree_interior_mask,
737                                         struct bbpos start, struct bbpos *end)
738 {
739         struct bch_fs *c = trans->c;
740         s64 mem_may_pin = mem_may_pin_bytes(c);
741         int ret = 0;
742
743         btree_interior_mask |= btree_leaf_mask;
744
745         c->btree_cache.pinned_nodes_leaf_mask           = btree_leaf_mask;
746         c->btree_cache.pinned_nodes_interior_mask       = btree_interior_mask;
747         c->btree_cache.pinned_nodes_start               = start;
748         c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
749
750         for (enum btree_id btree = start.btree;
751              btree < BTREE_ID_NR && !ret;
752              btree++) {
753                 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
754                 struct btree_iter iter;
755                 struct btree *b;
756
757                 if (!((1U << btree) & btree_leaf_mask) &&
758                     !((1U << btree) & btree_interior_mask))
759                         continue;
760
761                 __for_each_btree_node(trans, iter, btree,
762                                       btree == start.btree ? start.pos : POS_MIN,
763                                       0, depth, BTREE_ITER_PREFETCH, b, ret) {
764                         mem_may_pin -= btree_buf_bytes(b);
765                         if (mem_may_pin <= 0) {
766                                 c->btree_cache.pinned_nodes_end = *end =
767                                         BBPOS(btree, b->key.k.p);
768                                 bch2_trans_iter_exit(trans, &iter);
769                                 return 0;
770                         }
771                 }
772                 bch2_trans_iter_exit(trans, &iter);
773         }
774
775         return ret;
776 }
777
778 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
779                                                    struct extents_to_bp_state *s)
780 {
781         struct bch_fs *c = trans->c;
782         int ret = 0;
783
784         for (enum btree_id btree_id = 0;
785              btree_id < btree_id_nr_alive(c);
786              btree_id++) {
787                 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
788
789                 ret = commit_do(trans, NULL, NULL,
790                                 BCH_TRANS_COMMIT_no_enospc,
791                                 check_btree_root_to_backpointers(trans, s, btree_id, &level));
792                 if (ret)
793                         return ret;
794
795                 while (level >= depth) {
796                         struct btree_iter iter;
797                         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
798                                                   level,
799                                                   BTREE_ITER_PREFETCH);
800                         while (1) {
801                                 bch2_trans_begin(trans);
802
803                                 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
804                                 if (!k.k)
805                                         break;
806                                 ret = bkey_err(k) ?:
807                                         check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
808                                         bch2_trans_commit(trans, NULL, NULL,
809                                                           BCH_TRANS_COMMIT_no_enospc);
810                                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
811                                         ret = 0;
812                                         continue;
813                                 }
814                                 if (ret)
815                                         break;
816                                 if (bpos_eq(iter.pos, SPOS_MAX))
817                                         break;
818                                 bch2_btree_iter_advance(&iter);
819                         }
820                         bch2_trans_iter_exit(trans, &iter);
821
822                         if (ret)
823                                 return ret;
824
825                         --level;
826                 }
827         }
828
829         return 0;
830 }
831
832 int bch2_check_extents_to_backpointers(struct bch_fs *c)
833 {
834         struct btree_trans *trans = bch2_trans_get(c);
835         struct extents_to_bp_state s = { .bucket_start = POS_MIN };
836         int ret;
837
838         bch2_bkey_buf_init(&s.last_flushed);
839         bkey_init(&s.last_flushed.k->k);
840
841         while (1) {
842                 struct bbpos end;
843                 ret = bch2_get_btree_in_memory_pos(trans,
844                                 BIT_ULL(BTREE_ID_backpointers),
845                                 BIT_ULL(BTREE_ID_backpointers),
846                                 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
847                 if (ret)
848                         break;
849
850                 s.bucket_end = end.pos;
851
852                 if ( bpos_eq(s.bucket_start, POS_MIN) &&
853                     !bpos_eq(s.bucket_end, SPOS_MAX))
854                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
855                                     __func__, btree_nodes_fit_in_ram(c));
856
857                 if (!bpos_eq(s.bucket_start, POS_MIN) ||
858                     !bpos_eq(s.bucket_end, SPOS_MAX)) {
859                         struct printbuf buf = PRINTBUF;
860
861                         prt_str(&buf, "check_extents_to_backpointers(): ");
862                         bch2_bpos_to_text(&buf, s.bucket_start);
863                         prt_str(&buf, "-");
864                         bch2_bpos_to_text(&buf, s.bucket_end);
865
866                         bch_verbose(c, "%s", buf.buf);
867                         printbuf_exit(&buf);
868                 }
869
870                 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
871                 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
872                         break;
873
874                 s.bucket_start = bpos_successor(s.bucket_end);
875         }
876         bch2_trans_put(trans);
877         bch2_bkey_buf_exit(&s.last_flushed, c);
878
879         c->btree_cache.pinned_nodes_leaf_mask = 0;
880         c->btree_cache.pinned_nodes_interior_mask = 0;
881
882         bch_err_fn(c, ret);
883         return ret;
884 }
885
886 static int check_one_backpointer(struct btree_trans *trans,
887                                  struct bbpos start,
888                                  struct bbpos end,
889                                  struct bkey_s_c_backpointer bp,
890                                  struct bpos *last_flushed_pos)
891 {
892         struct bch_fs *c = trans->c;
893         struct btree_iter iter;
894         struct bbpos pos = bp_to_bbpos(*bp.v);
895         struct bkey_s_c k;
896         struct printbuf buf = PRINTBUF;
897         int ret;
898
899         if (bbpos_cmp(pos, start) < 0 ||
900             bbpos_cmp(pos, end) > 0)
901                 return 0;
902
903         k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
904         ret = bkey_err(k);
905         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
906                 return 0;
907         if (ret)
908                 return ret;
909
910         if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
911                 *last_flushed_pos = bp.k->p;
912                 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
913                         -BCH_ERR_transaction_restart_write_buffer_flush;
914                 goto out;
915         }
916
917         if (fsck_err_on(!k.k, c,
918                         backpointer_to_missing_ptr,
919                         "backpointer for missing %s\n  %s",
920                         bp.v->level ? "btree node" : "extent",
921                         (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
922                 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
923                 goto out;
924         }
925 out:
926 fsck_err:
927         bch2_trans_iter_exit(trans, &iter);
928         printbuf_exit(&buf);
929         return ret;
930 }
931
932 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
933                                                    struct bbpos start,
934                                                    struct bbpos end)
935 {
936         struct bpos last_flushed_pos = SPOS_MAX;
937
938         return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
939                                   POS_MIN, BTREE_ITER_PREFETCH, k,
940                                   NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
941                 check_one_backpointer(trans, start, end,
942                                       bkey_s_c_to_backpointer(k),
943                                       &last_flushed_pos));
944 }
945
946 int bch2_check_backpointers_to_extents(struct bch_fs *c)
947 {
948         struct btree_trans *trans = bch2_trans_get(c);
949         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
950         int ret;
951
952         while (1) {
953                 ret = bch2_get_btree_in_memory_pos(trans,
954                                                    (1U << BTREE_ID_extents)|
955                                                    (1U << BTREE_ID_reflink),
956                                                    ~0,
957                                                    start, &end);
958                 if (ret)
959                         break;
960
961                 if (!bbpos_cmp(start, BBPOS_MIN) &&
962                     bbpos_cmp(end, BBPOS_MAX))
963                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
964                                     __func__, btree_nodes_fit_in_ram(c));
965
966                 if (bbpos_cmp(start, BBPOS_MIN) ||
967                     bbpos_cmp(end, BBPOS_MAX)) {
968                         struct printbuf buf = PRINTBUF;
969
970                         prt_str(&buf, "check_backpointers_to_extents(): ");
971                         bch2_bbpos_to_text(&buf, start);
972                         prt_str(&buf, "-");
973                         bch2_bbpos_to_text(&buf, end);
974
975                         bch_verbose(c, "%s", buf.buf);
976                         printbuf_exit(&buf);
977                 }
978
979                 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
980                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
981                         break;
982
983                 start = bbpos_successor(end);
984         }
985         bch2_trans_put(trans);
986
987         c->btree_cache.pinned_nodes_leaf_mask = 0;
988         c->btree_cache.pinned_nodes_interior_mask = 0;
989
990         bch_err_fn(c, ret);
991         return ret;
992 }