GNU Linux-libre 6.8.7-gnu
[releases.git] / fs / bcachefs / backpointers.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "error.h"
12
13 #include <linux/mm.h>
14
15 static bool extent_matches_bp(struct bch_fs *c,
16                               enum btree_id btree_id, unsigned level,
17                               struct bkey_s_c k,
18                               struct bpos bucket,
19                               struct bch_backpointer bp)
20 {
21         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
22         const union bch_extent_entry *entry;
23         struct extent_ptr_decoded p;
24
25         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
26                 struct bpos bucket2;
27                 struct bch_backpointer bp2;
28
29                 if (p.ptr.cached)
30                         continue;
31
32                 bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
33                                       &bucket2, &bp2);
34                 if (bpos_eq(bucket, bucket2) &&
35                     !memcmp(&bp, &bp2, sizeof(bp)))
36                         return true;
37         }
38
39         return false;
40 }
41
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43                              enum bkey_invalid_flags flags,
44                              struct printbuf *err)
45 {
46         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47         struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
48         int ret = 0;
49
50         bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
51                          c, err,
52                          backpointer_pos_wrong,
53                          "backpointer at wrong pos");
54 fsck_err:
55         return ret;
56 }
57
58 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
59 {
60         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
61                bch2_btree_id_str(bp->btree_id),
62                bp->level,
63                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
64                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
65                bp->bucket_len);
66         bch2_bpos_to_text(out, bp->pos);
67 }
68
69 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
70 {
71         if (bch2_dev_exists2(c, k.k->p.inode)) {
72                 prt_str(out, "bucket=");
73                 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
74                 prt_str(out, " ");
75         }
76
77         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
78 }
79
80 void bch2_backpointer_swab(struct bkey_s k)
81 {
82         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
83
84         bp.v->bucket_offset     = swab40(bp.v->bucket_offset);
85         bp.v->bucket_len        = swab32(bp.v->bucket_len);
86         bch2_bpos_swab(&bp.v->pos);
87 }
88
89 static noinline int backpointer_mod_err(struct btree_trans *trans,
90                                         struct bch_backpointer bp,
91                                         struct bkey_s_c bp_k,
92                                         struct bkey_s_c orig_k,
93                                         bool insert)
94 {
95         struct bch_fs *c = trans->c;
96         struct printbuf buf = PRINTBUF;
97
98         if (insert) {
99                 prt_printf(&buf, "existing backpointer found when inserting ");
100                 bch2_backpointer_to_text(&buf, &bp);
101                 prt_newline(&buf);
102                 printbuf_indent_add(&buf, 2);
103
104                 prt_printf(&buf, "found ");
105                 bch2_bkey_val_to_text(&buf, c, bp_k);
106                 prt_newline(&buf);
107
108                 prt_printf(&buf, "for ");
109                 bch2_bkey_val_to_text(&buf, c, orig_k);
110
111                 bch_err(c, "%s", buf.buf);
112         } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
113                 prt_printf(&buf, "backpointer not found when deleting");
114                 prt_newline(&buf);
115                 printbuf_indent_add(&buf, 2);
116
117                 prt_printf(&buf, "searching for ");
118                 bch2_backpointer_to_text(&buf, &bp);
119                 prt_newline(&buf);
120
121                 prt_printf(&buf, "got ");
122                 bch2_bkey_val_to_text(&buf, c, bp_k);
123                 prt_newline(&buf);
124
125                 prt_printf(&buf, "for ");
126                 bch2_bkey_val_to_text(&buf, c, orig_k);
127
128                 bch_err(c, "%s", buf.buf);
129         }
130
131         printbuf_exit(&buf);
132
133         if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
134                 bch2_inconsistent_error(c);
135                 return -EIO;
136         } else {
137                 return 0;
138         }
139 }
140
141 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
142                                 struct bpos bucket,
143                                 struct bch_backpointer bp,
144                                 struct bkey_s_c orig_k,
145                                 bool insert)
146 {
147         struct btree_iter bp_iter;
148         struct bkey_s_c k;
149         struct bkey_i_backpointer *bp_k;
150         int ret;
151
152         bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
153         ret = PTR_ERR_OR_ZERO(bp_k);
154         if (ret)
155                 return ret;
156
157         bkey_backpointer_init(&bp_k->k_i);
158         bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
159         bp_k->v = bp;
160
161         if (!insert) {
162                 bp_k->k.type = KEY_TYPE_deleted;
163                 set_bkey_val_u64s(&bp_k->k, 0);
164         }
165
166         k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
167                                bp_k->k.p,
168                                BTREE_ITER_INTENT|
169                                BTREE_ITER_SLOTS|
170                                BTREE_ITER_WITH_UPDATES);
171         ret = bkey_err(k);
172         if (ret)
173                 goto err;
174
175         if (insert
176             ? k.k->type
177             : (k.k->type != KEY_TYPE_backpointer ||
178                memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
179                 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
180                 if (ret)
181                         goto err;
182         }
183
184         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
185 err:
186         bch2_trans_iter_exit(trans, &bp_iter);
187         return ret;
188 }
189
190 /*
191  * Find the next backpointer >= *bp_offset:
192  */
193 int bch2_get_next_backpointer(struct btree_trans *trans,
194                               struct bpos bucket, int gen,
195                               struct bpos *bp_pos,
196                               struct bch_backpointer *bp,
197                               unsigned iter_flags)
198 {
199         struct bch_fs *c = trans->c;
200         struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
201         struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
202         struct bkey_s_c k;
203         int ret = 0;
204
205         if (bpos_ge(*bp_pos, bp_end_pos))
206                 goto done;
207
208         if (gen >= 0) {
209                 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
210                                        bucket, BTREE_ITER_CACHED|iter_flags);
211                 ret = bkey_err(k);
212                 if (ret)
213                         goto out;
214
215                 if (k.k->type != KEY_TYPE_alloc_v4 ||
216                     bkey_s_c_to_alloc_v4(k).v->gen != gen)
217                         goto done;
218         }
219
220         *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
221
222         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
223                                      *bp_pos, iter_flags, k, ret) {
224                 if (bpos_ge(k.k->p, bp_end_pos))
225                         break;
226
227                 *bp_pos = k.k->p;
228                 *bp = *bkey_s_c_to_backpointer(k).v;
229                 goto out;
230         }
231 done:
232         *bp_pos = SPOS_MAX;
233 out:
234         bch2_trans_iter_exit(trans, &bp_iter);
235         bch2_trans_iter_exit(trans, &alloc_iter);
236         return ret;
237 }
238
239 static void backpointer_not_found(struct btree_trans *trans,
240                                   struct bpos bp_pos,
241                                   struct bch_backpointer bp,
242                                   struct bkey_s_c k)
243 {
244         struct bch_fs *c = trans->c;
245         struct printbuf buf = PRINTBUF;
246         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
247
248         /*
249          * If we're using the btree write buffer, the backpointer we were
250          * looking at may have already been deleted - failure to find what it
251          * pointed to is not an error:
252          */
253         if (likely(!bch2_backpointers_no_use_write_buffer))
254                 return;
255
256         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
257                    bp.level ? "btree node" : "extent");
258         prt_printf(&buf, "bucket: ");
259         bch2_bpos_to_text(&buf, bucket);
260         prt_printf(&buf, "\n  ");
261
262         prt_printf(&buf, "backpointer pos: ");
263         bch2_bpos_to_text(&buf, bp_pos);
264         prt_printf(&buf, "\n  ");
265
266         bch2_backpointer_to_text(&buf, &bp);
267         prt_printf(&buf, "\n  ");
268         bch2_bkey_val_to_text(&buf, c, k);
269         if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
270                 bch_err_ratelimited(c, "%s", buf.buf);
271         else
272                 bch2_trans_inconsistent(trans, "%s", buf.buf);
273
274         printbuf_exit(&buf);
275 }
276
277 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
278                                          struct btree_iter *iter,
279                                          struct bpos bp_pos,
280                                          struct bch_backpointer bp,
281                                          unsigned iter_flags)
282 {
283         if (likely(!bp.level)) {
284                 struct bch_fs *c = trans->c;
285                 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
286                 struct bkey_s_c k;
287
288                 bch2_trans_node_iter_init(trans, iter,
289                                           bp.btree_id,
290                                           bp.pos,
291                                           0, 0,
292                                           iter_flags);
293                 k = bch2_btree_iter_peek_slot(iter);
294                 if (bkey_err(k)) {
295                         bch2_trans_iter_exit(trans, iter);
296                         return k;
297                 }
298
299                 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
300                         return k;
301
302                 bch2_trans_iter_exit(trans, iter);
303                 backpointer_not_found(trans, bp_pos, bp, k);
304                 return bkey_s_c_null;
305         } else {
306                 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
307
308                 if (IS_ERR_OR_NULL(b)) {
309                         bch2_trans_iter_exit(trans, iter);
310                         return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
311                 }
312                 return bkey_i_to_s_c(&b->key);
313         }
314 }
315
316 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
317                                         struct btree_iter *iter,
318                                         struct bpos bp_pos,
319                                         struct bch_backpointer bp)
320 {
321         struct bch_fs *c = trans->c;
322         struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
323         struct btree *b;
324
325         BUG_ON(!bp.level);
326
327         bch2_trans_node_iter_init(trans, iter,
328                                   bp.btree_id,
329                                   bp.pos,
330                                   0,
331                                   bp.level - 1,
332                                   0);
333         b = bch2_btree_iter_peek_node(iter);
334         if (IS_ERR_OR_NULL(b))
335                 goto err;
336
337         BUG_ON(b->c.level != bp.level - 1);
338
339         if (extent_matches_bp(c, bp.btree_id, bp.level,
340                               bkey_i_to_s_c(&b->key),
341                               bucket, bp))
342                 return b;
343
344         if (btree_node_will_make_reachable(b)) {
345                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
346         } else {
347                 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
348                 b = NULL;
349         }
350 err:
351         bch2_trans_iter_exit(trans, iter);
352         return b;
353 }
354
355 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
356                                         struct bkey_s_c k)
357 {
358         struct bch_fs *c = trans->c;
359         struct btree_iter alloc_iter = { NULL };
360         struct bkey_s_c alloc_k;
361         struct printbuf buf = PRINTBUF;
362         int ret = 0;
363
364         if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
365                         backpointer_to_missing_device,
366                         "backpointer for missing device:\n%s",
367                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
368                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
369                 goto out;
370         }
371
372         alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
373                                      bp_pos_to_bucket(c, k.k->p), 0);
374         ret = bkey_err(alloc_k);
375         if (ret)
376                 goto out;
377
378         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
379                         backpointer_to_missing_alloc,
380                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
381                         alloc_iter.pos.inode, alloc_iter.pos.offset,
382                         (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
383                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
384                 goto out;
385         }
386 out:
387 fsck_err:
388         bch2_trans_iter_exit(trans, &alloc_iter);
389         printbuf_exit(&buf);
390         return ret;
391 }
392
393 /* verify that every backpointer has a corresponding alloc key */
394 int bch2_check_btree_backpointers(struct bch_fs *c)
395 {
396         int ret = bch2_trans_run(c,
397                 for_each_btree_key_commit(trans, iter,
398                         BTREE_ID_backpointers, POS_MIN, 0, k,
399                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
400                   bch2_check_btree_backpointer(trans, &iter, k)));
401         bch_err_fn(c, ret);
402         return ret;
403 }
404
405 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
406 {
407         return bpos_eq(l.k->p, r.k->p) &&
408                 bkey_bytes(l.k) == bkey_bytes(r.k) &&
409                 !memcmp(l.v, r.v, bkey_val_bytes(l.k));
410 }
411
412 struct extents_to_bp_state {
413         struct bpos     bucket_start;
414         struct bpos     bucket_end;
415         struct bkey_buf last_flushed;
416 };
417
418 static int check_bp_exists(struct btree_trans *trans,
419                            struct extents_to_bp_state *s,
420                            struct bpos bucket,
421                            struct bch_backpointer bp,
422                            struct bkey_s_c orig_k)
423 {
424         struct bch_fs *c = trans->c;
425         struct btree_iter bp_iter = { NULL };
426         struct printbuf buf = PRINTBUF;
427         struct bkey_s_c bp_k;
428         struct bkey_buf tmp;
429         int ret;
430
431         bch2_bkey_buf_init(&tmp);
432
433         if (bpos_lt(bucket, s->bucket_start) ||
434             bpos_gt(bucket, s->bucket_end))
435                 return 0;
436
437         if (!bch2_dev_bucket_exists(c, bucket))
438                 goto missing;
439
440         bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
441                                   bucket_pos_to_bp(c, bucket, bp.bucket_offset),
442                                   0);
443         ret = bkey_err(bp_k);
444         if (ret)
445                 goto err;
446
447         if (bp_k.k->type != KEY_TYPE_backpointer ||
448             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
449                 bch2_bkey_buf_reassemble(&tmp, c, orig_k);
450
451                 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
452                         if (bp.level) {
453                                 bch2_trans_unlock(trans);
454                                 bch2_btree_interior_updates_flush(c);
455                         }
456
457                         ret = bch2_btree_write_buffer_flush_sync(trans);
458                         if (ret)
459                                 goto err;
460
461                         bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
462                         ret = -BCH_ERR_transaction_restart_write_buffer_flush;
463                         goto out;
464                 }
465                 goto missing;
466         }
467 out:
468 err:
469 fsck_err:
470         bch2_trans_iter_exit(trans, &bp_iter);
471         bch2_bkey_buf_exit(&tmp, c);
472         printbuf_exit(&buf);
473         return ret;
474 missing:
475         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
476                bch2_btree_id_str(bp.btree_id), bp.level);
477         bch2_bkey_val_to_text(&buf, c, orig_k);
478         prt_printf(&buf, "\nbp pos ");
479         bch2_bpos_to_text(&buf, bp_iter.pos);
480
481         if (c->opts.reconstruct_alloc ||
482             fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
483                 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
484
485         goto out;
486 }
487
488 static int check_extent_to_backpointers(struct btree_trans *trans,
489                                         struct extents_to_bp_state *s,
490                                         enum btree_id btree, unsigned level,
491                                         struct bkey_s_c k)
492 {
493         struct bch_fs *c = trans->c;
494         struct bkey_ptrs_c ptrs;
495         const union bch_extent_entry *entry;
496         struct extent_ptr_decoded p;
497         int ret;
498
499         ptrs = bch2_bkey_ptrs_c(k);
500         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
501                 struct bpos bucket_pos;
502                 struct bch_backpointer bp;
503
504                 if (p.ptr.cached)
505                         continue;
506
507                 bch2_extent_ptr_to_bp(c, btree, level,
508                                       k, p, &bucket_pos, &bp);
509
510                 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
511                 if (ret)
512                         return ret;
513         }
514
515         return 0;
516 }
517
518 static int check_btree_root_to_backpointers(struct btree_trans *trans,
519                                             struct extents_to_bp_state *s,
520                                             enum btree_id btree_id,
521                                             int *level)
522 {
523         struct bch_fs *c = trans->c;
524         struct btree_iter iter;
525         struct btree *b;
526         struct bkey_s_c k;
527         int ret;
528 retry:
529         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
530                                   0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
531         b = bch2_btree_iter_peek_node(&iter);
532         ret = PTR_ERR_OR_ZERO(b);
533         if (ret)
534                 goto err;
535
536         if (b != btree_node_root(c, b)) {
537                 bch2_trans_iter_exit(trans, &iter);
538                 goto retry;
539         }
540
541         *level = b->c.level;
542
543         k = bkey_i_to_s_c(&b->key);
544         ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
545 err:
546         bch2_trans_iter_exit(trans, &iter);
547         return ret;
548 }
549
550 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
551 {
552         return (struct bbpos) {
553                 .btree  = bp.btree_id,
554                 .pos    = bp.pos,
555         };
556 }
557
558 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
559 {
560         struct sysinfo i;
561         u64 mem_bytes;
562
563         si_meminfo(&i);
564         mem_bytes = i.totalram * i.mem_unit;
565         return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
566 }
567
568 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
569                                         unsigned btree_leaf_mask,
570                                         unsigned btree_interior_mask,
571                                         struct bbpos start, struct bbpos *end)
572 {
573         struct btree_iter iter;
574         struct bkey_s_c k;
575         size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
576         enum btree_id btree;
577         int ret = 0;
578
579         for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
580                 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
581
582                 if (!((1U << btree) & btree_leaf_mask) &&
583                     !((1U << btree) & btree_interior_mask))
584                         continue;
585
586                 bch2_trans_node_iter_init(trans, &iter, btree,
587                                           btree == start.btree ? start.pos : POS_MIN,
588                                           0, depth, 0);
589                 /*
590                  * for_each_btree_key_contineu() doesn't check the return value
591                  * from bch2_btree_iter_advance(), which is needed when
592                  * iterating over interior nodes where we'll see keys at
593                  * SPOS_MAX:
594                  */
595                 do {
596                         k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
597                         ret = bkey_err(k);
598                         if (!k.k || ret)
599                                 break;
600
601                         --btree_nodes;
602                         if (!btree_nodes) {
603                                 *end = BBPOS(btree, k.k->p);
604                                 bch2_trans_iter_exit(trans, &iter);
605                                 return 0;
606                         }
607                 } while (bch2_btree_iter_advance(&iter));
608                 bch2_trans_iter_exit(trans, &iter);
609         }
610
611         *end = BBPOS_MAX;
612         return ret;
613 }
614
615 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
616                                                    struct extents_to_bp_state *s)
617 {
618         struct bch_fs *c = trans->c;
619         int ret = 0;
620
621         for (enum btree_id btree_id = 0;
622              btree_id < btree_id_nr_alive(c);
623              btree_id++) {
624                 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
625
626                 ret = commit_do(trans, NULL, NULL,
627                                 BCH_TRANS_COMMIT_no_enospc,
628                                 check_btree_root_to_backpointers(trans, s, btree_id, &level));
629                 if (ret)
630                         return ret;
631
632                 while (level >= depth) {
633                         struct btree_iter iter;
634                         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
635                                                   level,
636                                                   BTREE_ITER_PREFETCH);
637                         while (1) {
638                                 bch2_trans_begin(trans);
639
640                                 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
641                                 if (!k.k)
642                                         break;
643                                 ret = bkey_err(k) ?:
644                                         check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
645                                         bch2_trans_commit(trans, NULL, NULL,
646                                                           BCH_TRANS_COMMIT_no_enospc);
647                                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
648                                         ret = 0;
649                                         continue;
650                                 }
651                                 if (ret)
652                                         break;
653                                 if (bpos_eq(iter.pos, SPOS_MAX))
654                                         break;
655                                 bch2_btree_iter_advance(&iter);
656                         }
657                         bch2_trans_iter_exit(trans, &iter);
658
659                         if (ret)
660                                 return ret;
661
662                         --level;
663                 }
664         }
665
666         return 0;
667 }
668
669 static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
670                                          struct bpos bucket)
671 {
672         return bch2_dev_exists2(c, bucket.inode)
673                 ? bucket_pos_to_bp(c, bucket, 0)
674                 : bucket;
675 }
676
677 static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
678                                         struct bpos start, struct bpos *end)
679 {
680         struct btree_iter alloc_iter;
681         struct btree_iter bp_iter;
682         struct bkey_s_c alloc_k, bp_k;
683         size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
684         bool alloc_end = false, bp_end = false;
685         int ret = 0;
686
687         bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
688                                   start, 0, 1, 0);
689         bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
690                                   bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
691         while (1) {
692                 alloc_k = !alloc_end
693                         ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
694                         : bkey_s_c_null;
695                 bp_k = !bp_end
696                         ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
697                         : bkey_s_c_null;
698
699                 ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
700                 if ((!alloc_k.k && !bp_k.k) || ret) {
701                         *end = SPOS_MAX;
702                         break;
703                 }
704
705                 --btree_nodes;
706                 if (!btree_nodes) {
707                         *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
708                         break;
709                 }
710
711                 if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
712                     bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
713                         if (!bch2_btree_iter_advance(&alloc_iter))
714                                 alloc_end = true;
715                 } else {
716                         if (!bch2_btree_iter_advance(&bp_iter))
717                                 bp_end = true;
718                 }
719         }
720         bch2_trans_iter_exit(trans, &bp_iter);
721         bch2_trans_iter_exit(trans, &alloc_iter);
722         return ret;
723 }
724
725 int bch2_check_extents_to_backpointers(struct bch_fs *c)
726 {
727         struct btree_trans *trans = bch2_trans_get(c);
728         struct extents_to_bp_state s = { .bucket_start = POS_MIN };
729         int ret;
730
731         bch2_bkey_buf_init(&s.last_flushed);
732         bkey_init(&s.last_flushed.k->k);
733
734         while (1) {
735                 ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
736                 if (ret)
737                         break;
738
739                 if ( bpos_eq(s.bucket_start, POS_MIN) &&
740                     !bpos_eq(s.bucket_end, SPOS_MAX))
741                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
742                                     __func__, btree_nodes_fit_in_ram(c));
743
744                 if (!bpos_eq(s.bucket_start, POS_MIN) ||
745                     !bpos_eq(s.bucket_end, SPOS_MAX)) {
746                         struct printbuf buf = PRINTBUF;
747
748                         prt_str(&buf, "check_extents_to_backpointers(): ");
749                         bch2_bpos_to_text(&buf, s.bucket_start);
750                         prt_str(&buf, "-");
751                         bch2_bpos_to_text(&buf, s.bucket_end);
752
753                         bch_verbose(c, "%s", buf.buf);
754                         printbuf_exit(&buf);
755                 }
756
757                 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
758                 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
759                         break;
760
761                 s.bucket_start = bpos_successor(s.bucket_end);
762         }
763         bch2_trans_put(trans);
764         bch2_bkey_buf_exit(&s.last_flushed, c);
765
766         bch_err_fn(c, ret);
767         return ret;
768 }
769
770 static int check_one_backpointer(struct btree_trans *trans,
771                                  struct bbpos start,
772                                  struct bbpos end,
773                                  struct bkey_s_c_backpointer bp,
774                                  struct bpos *last_flushed_pos)
775 {
776         struct bch_fs *c = trans->c;
777         struct btree_iter iter;
778         struct bbpos pos = bp_to_bbpos(*bp.v);
779         struct bkey_s_c k;
780         struct printbuf buf = PRINTBUF;
781         int ret;
782
783         if (bbpos_cmp(pos, start) < 0 ||
784             bbpos_cmp(pos, end) > 0)
785                 return 0;
786
787         k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
788         ret = bkey_err(k);
789         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
790                 return 0;
791         if (ret)
792                 return ret;
793
794         if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
795                 *last_flushed_pos = bp.k->p;
796                 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
797                         -BCH_ERR_transaction_restart_write_buffer_flush;
798                 goto out;
799         }
800
801         if (fsck_err_on(!k.k, c,
802                         backpointer_to_missing_ptr,
803                         "backpointer for missing %s\n  %s",
804                         bp.v->level ? "btree node" : "extent",
805                         (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
806                 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
807                 goto out;
808         }
809 out:
810 fsck_err:
811         bch2_trans_iter_exit(trans, &iter);
812         printbuf_exit(&buf);
813         return ret;
814 }
815
816 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
817                                                    struct bbpos start,
818                                                    struct bbpos end)
819 {
820         struct bpos last_flushed_pos = SPOS_MAX;
821
822         return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
823                                   POS_MIN, BTREE_ITER_PREFETCH, k,
824                                   NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
825                 check_one_backpointer(trans, start, end,
826                                       bkey_s_c_to_backpointer(k),
827                                       &last_flushed_pos));
828 }
829
830 int bch2_check_backpointers_to_extents(struct bch_fs *c)
831 {
832         struct btree_trans *trans = bch2_trans_get(c);
833         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
834         int ret;
835
836         while (1) {
837                 ret = bch2_get_btree_in_memory_pos(trans,
838                                                    (1U << BTREE_ID_extents)|
839                                                    (1U << BTREE_ID_reflink),
840                                                    ~0,
841                                                    start, &end);
842                 if (ret)
843                         break;
844
845                 if (!bbpos_cmp(start, BBPOS_MIN) &&
846                     bbpos_cmp(end, BBPOS_MAX))
847                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
848                                     __func__, btree_nodes_fit_in_ram(c));
849
850                 if (bbpos_cmp(start, BBPOS_MIN) ||
851                     bbpos_cmp(end, BBPOS_MAX)) {
852                         struct printbuf buf = PRINTBUF;
853
854                         prt_str(&buf, "check_backpointers_to_extents(): ");
855                         bch2_bbpos_to_text(&buf, start);
856                         prt_str(&buf, "-");
857                         bch2_bbpos_to_text(&buf, end);
858
859                         bch_verbose(c, "%s", buf.buf);
860                         printbuf_exit(&buf);
861                 }
862
863                 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
864                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
865                         break;
866
867                 start = bbpos_successor(end);
868         }
869         bch2_trans_put(trans);
870
871         bch_err_fn(c, ret);
872         return ret;
873 }