GNU Linux-libre 6.0.2-gnu
[releases.git] / fs / xfs / scrub / btree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
18
19 /* btree scrubbing */
20
21 /*
22  * Check for btree operation errors.  See the section about handling
23  * operational errors in common.c.
24  */
25 static bool
26 __xchk_btree_process_error(
27         struct xfs_scrub        *sc,
28         struct xfs_btree_cur    *cur,
29         int                     level,
30         int                     *error,
31         __u32                   errflag,
32         void                    *ret_ip)
33 {
34         if (*error == 0)
35                 return true;
36
37         switch (*error) {
38         case -EDEADLOCK:
39                 /* Used to restart an op with deadlock avoidance. */
40                 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
41                 break;
42         case -EFSBADCRC:
43         case -EFSCORRUPTED:
44                 /* Note the badness but don't abort. */
45                 sc->sm->sm_flags |= errflag;
46                 *error = 0;
47                 fallthrough;
48         default:
49                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
50                         trace_xchk_ifork_btree_op_error(sc, cur, level,
51                                         *error, ret_ip);
52                 else
53                         trace_xchk_btree_op_error(sc, cur, level,
54                                         *error, ret_ip);
55                 break;
56         }
57         return false;
58 }
59
60 bool
61 xchk_btree_process_error(
62         struct xfs_scrub        *sc,
63         struct xfs_btree_cur    *cur,
64         int                     level,
65         int                     *error)
66 {
67         return __xchk_btree_process_error(sc, cur, level, error,
68                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
69 }
70
71 bool
72 xchk_btree_xref_process_error(
73         struct xfs_scrub        *sc,
74         struct xfs_btree_cur    *cur,
75         int                     level,
76         int                     *error)
77 {
78         return __xchk_btree_process_error(sc, cur, level, error,
79                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
80 }
81
82 /* Record btree block corruption. */
83 static void
84 __xchk_btree_set_corrupt(
85         struct xfs_scrub        *sc,
86         struct xfs_btree_cur    *cur,
87         int                     level,
88         __u32                   errflag,
89         void                    *ret_ip)
90 {
91         sc->sm->sm_flags |= errflag;
92
93         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
94                 trace_xchk_ifork_btree_error(sc, cur, level,
95                                 ret_ip);
96         else
97                 trace_xchk_btree_error(sc, cur, level,
98                                 ret_ip);
99 }
100
101 void
102 xchk_btree_set_corrupt(
103         struct xfs_scrub        *sc,
104         struct xfs_btree_cur    *cur,
105         int                     level)
106 {
107         __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
108                         __return_address);
109 }
110
111 void
112 xchk_btree_xref_set_corrupt(
113         struct xfs_scrub        *sc,
114         struct xfs_btree_cur    *cur,
115         int                     level)
116 {
117         __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
118                         __return_address);
119 }
120
121 /*
122  * Make sure this record is in order and doesn't stray outside of the parent
123  * keys.
124  */
125 STATIC void
126 xchk_btree_rec(
127         struct xchk_btree       *bs)
128 {
129         struct xfs_btree_cur    *cur = bs->cur;
130         union xfs_btree_rec     *rec;
131         union xfs_btree_key     key;
132         union xfs_btree_key     hkey;
133         union xfs_btree_key     *keyp;
134         struct xfs_btree_block  *block;
135         struct xfs_btree_block  *keyblock;
136         struct xfs_buf          *bp;
137
138         block = xfs_btree_get_block(cur, 0, &bp);
139         rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
140
141         trace_xchk_btree_rec(bs->sc, cur, 0);
142
143         /* If this isn't the first record, are they in order? */
144         if (cur->bc_levels[0].ptr > 1 &&
145             !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
146                 xchk_btree_set_corrupt(bs->sc, cur, 0);
147         memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
148
149         if (cur->bc_nlevels == 1)
150                 return;
151
152         /* Is this at least as large as the parent low key? */
153         cur->bc_ops->init_key_from_rec(&key, rec);
154         keyblock = xfs_btree_get_block(cur, 1, &bp);
155         keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
156         if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
157                 xchk_btree_set_corrupt(bs->sc, cur, 1);
158
159         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
160                 return;
161
162         /* Is this no larger than the parent high key? */
163         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
164         keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
165         if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
166                 xchk_btree_set_corrupt(bs->sc, cur, 1);
167 }
168
169 /*
170  * Make sure this key is in order and doesn't stray outside of the parent
171  * keys.
172  */
173 STATIC void
174 xchk_btree_key(
175         struct xchk_btree       *bs,
176         int                     level)
177 {
178         struct xfs_btree_cur    *cur = bs->cur;
179         union xfs_btree_key     *key;
180         union xfs_btree_key     *keyp;
181         struct xfs_btree_block  *block;
182         struct xfs_btree_block  *keyblock;
183         struct xfs_buf          *bp;
184
185         block = xfs_btree_get_block(cur, level, &bp);
186         key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
187
188         trace_xchk_btree_key(bs->sc, cur, level);
189
190         /* If this isn't the first key, are they in order? */
191         if (cur->bc_levels[level].ptr > 1 &&
192             !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key))
193                 xchk_btree_set_corrupt(bs->sc, cur, level);
194         memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len);
195
196         if (level + 1 >= cur->bc_nlevels)
197                 return;
198
199         /* Is this at least as large as the parent low key? */
200         keyblock = xfs_btree_get_block(cur, level + 1, &bp);
201         keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
202         if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
203                 xchk_btree_set_corrupt(bs->sc, cur, level);
204
205         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
206                 return;
207
208         /* Is this no larger than the parent high key? */
209         key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
210         keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
211                         keyblock);
212         if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
213                 xchk_btree_set_corrupt(bs->sc, cur, level);
214 }
215
216 /*
217  * Check a btree pointer.  Returns true if it's ok to use this pointer.
218  * Callers do not need to set the corrupt flag.
219  */
220 static bool
221 xchk_btree_ptr_ok(
222         struct xchk_btree       *bs,
223         int                     level,
224         union xfs_btree_ptr     *ptr)
225 {
226         bool                    res;
227
228         /* A btree rooted in an inode has no block pointer to the root. */
229         if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
230             level == bs->cur->bc_nlevels)
231                 return true;
232
233         /* Otherwise, check the pointers. */
234         if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
235                 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
236         else
237                 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
238         if (!res)
239                 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
240
241         return res;
242 }
243
244 /* Check that a btree block's sibling matches what we expect it. */
245 STATIC int
246 xchk_btree_block_check_sibling(
247         struct xchk_btree       *bs,
248         int                     level,
249         int                     direction,
250         union xfs_btree_ptr     *sibling)
251 {
252         struct xfs_btree_cur    *cur = bs->cur;
253         struct xfs_btree_block  *pblock;
254         struct xfs_buf          *pbp;
255         struct xfs_btree_cur    *ncur = NULL;
256         union xfs_btree_ptr     *pp;
257         int                     success;
258         int                     error;
259
260         error = xfs_btree_dup_cursor(cur, &ncur);
261         if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
262             !ncur)
263                 return error;
264
265         /*
266          * If the pointer is null, we shouldn't be able to move the upper
267          * level pointer anywhere.
268          */
269         if (xfs_btree_ptr_is_null(cur, sibling)) {
270                 if (direction > 0)
271                         error = xfs_btree_increment(ncur, level + 1, &success);
272                 else
273                         error = xfs_btree_decrement(ncur, level + 1, &success);
274                 if (error == 0 && success)
275                         xchk_btree_set_corrupt(bs->sc, cur, level);
276                 error = 0;
277                 goto out;
278         }
279
280         /* Increment upper level pointer. */
281         if (direction > 0)
282                 error = xfs_btree_increment(ncur, level + 1, &success);
283         else
284                 error = xfs_btree_decrement(ncur, level + 1, &success);
285         if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
286                 goto out;
287         if (!success) {
288                 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
289                 goto out;
290         }
291
292         /* Compare upper level pointer to sibling pointer. */
293         pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
294         pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
295         if (!xchk_btree_ptr_ok(bs, level + 1, pp))
296                 goto out;
297         if (pbp)
298                 xchk_buffer_recheck(bs->sc, pbp);
299
300         if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
301                 xchk_btree_set_corrupt(bs->sc, cur, level);
302 out:
303         xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
304         return error;
305 }
306
307 /* Check the siblings of a btree block. */
308 STATIC int
309 xchk_btree_block_check_siblings(
310         struct xchk_btree       *bs,
311         struct xfs_btree_block  *block)
312 {
313         struct xfs_btree_cur    *cur = bs->cur;
314         union xfs_btree_ptr     leftsib;
315         union xfs_btree_ptr     rightsib;
316         int                     level;
317         int                     error = 0;
318
319         xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
320         xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
321         level = xfs_btree_get_level(block);
322
323         /* Root block should never have siblings. */
324         if (level == cur->bc_nlevels - 1) {
325                 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
326                     !xfs_btree_ptr_is_null(cur, &rightsib))
327                         xchk_btree_set_corrupt(bs->sc, cur, level);
328                 goto out;
329         }
330
331         /*
332          * Does the left & right sibling pointers match the adjacent
333          * parent level pointers?
334          * (These function absorbs error codes for us.)
335          */
336         error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
337         if (error)
338                 return error;
339         error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
340         if (error)
341                 return error;
342 out:
343         return error;
344 }
345
346 struct check_owner {
347         struct list_head        list;
348         xfs_daddr_t             daddr;
349         int                     level;
350 };
351
352 /*
353  * Make sure this btree block isn't in the free list and that there's
354  * an rmap record for it.
355  */
356 STATIC int
357 xchk_btree_check_block_owner(
358         struct xchk_btree       *bs,
359         int                     level,
360         xfs_daddr_t             daddr)
361 {
362         xfs_agnumber_t          agno;
363         xfs_agblock_t           agbno;
364         xfs_btnum_t             btnum;
365         bool                    init_sa;
366         int                     error = 0;
367
368         if (!bs->cur)
369                 return 0;
370
371         btnum = bs->cur->bc_btnum;
372         agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
373         agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
374
375         init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
376         if (init_sa) {
377                 error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
378                 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
379                                 level, &error))
380                         goto out_free;
381         }
382
383         xchk_xref_is_used_space(bs->sc, agbno, 1);
384         /*
385          * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
386          * have to nullify it (to shut down further block owner checks) if
387          * self-xref encounters problems.
388          */
389         if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
390                 bs->cur = NULL;
391
392         xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
393         if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
394                 bs->cur = NULL;
395
396 out_free:
397         if (init_sa)
398                 xchk_ag_free(bs->sc, &bs->sc->sa);
399
400         return error;
401 }
402
403 /* Check the owner of a btree block. */
404 STATIC int
405 xchk_btree_check_owner(
406         struct xchk_btree       *bs,
407         int                     level,
408         struct xfs_buf          *bp)
409 {
410         struct xfs_btree_cur    *cur = bs->cur;
411         struct check_owner      *co;
412
413         /*
414          * In theory, xfs_btree_get_block should only give us a null buffer
415          * pointer for the root of a root-in-inode btree type, but we need
416          * to check defensively here in case the cursor state is also screwed
417          * up.
418          */
419         if (bp == NULL) {
420                 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
421                         xchk_btree_set_corrupt(bs->sc, bs->cur, level);
422                 return 0;
423         }
424
425         /*
426          * We want to cross-reference each btree block with the bnobt
427          * and the rmapbt.  We cannot cross-reference the bnobt or
428          * rmapbt while scanning the bnobt or rmapbt, respectively,
429          * because we cannot alter the cursor and we'd prefer not to
430          * duplicate cursors.  Therefore, save the buffer daddr for
431          * later scanning.
432          */
433         if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
434                 co = kmem_alloc(sizeof(struct check_owner),
435                                 KM_MAYFAIL);
436                 if (!co)
437                         return -ENOMEM;
438                 co->level = level;
439                 co->daddr = xfs_buf_daddr(bp);
440                 list_add_tail(&co->list, &bs->to_check);
441                 return 0;
442         }
443
444         return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
445 }
446
447 /* Decide if we want to check minrecs of a btree block in the inode root. */
448 static inline bool
449 xchk_btree_check_iroot_minrecs(
450         struct xchk_btree       *bs)
451 {
452         /*
453          * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
454          * would miscalculate the space required for the data fork bmbt root
455          * when adding an attr fork, and promote the iroot contents to an
456          * external block unnecessarily.  This went unnoticed for many years
457          * until scrub found filesystems in this state.  Inode rooted btrees are
458          * not supposed to have immediate child blocks that are small enough
459          * that the contents could fit in the inode root, but we can't fail
460          * existing filesystems, so instead we disable the check for data fork
461          * bmap btrees when there's an attr fork.
462          */
463         if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
464             bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
465             xfs_inode_has_attr_fork(bs->sc->ip))
466                 return false;
467
468         return true;
469 }
470
471 /*
472  * Check that this btree block has at least minrecs records or is one of the
473  * special blocks that don't require that.
474  */
475 STATIC void
476 xchk_btree_check_minrecs(
477         struct xchk_btree       *bs,
478         int                     level,
479         struct xfs_btree_block  *block)
480 {
481         struct xfs_btree_cur    *cur = bs->cur;
482         unsigned int            root_level = cur->bc_nlevels - 1;
483         unsigned int            numrecs = be16_to_cpu(block->bb_numrecs);
484
485         /* More records than minrecs means the block is ok. */
486         if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
487                 return;
488
489         /*
490          * For btrees rooted in the inode, it's possible that the root block
491          * contents spilled into a regular ondisk block because there wasn't
492          * enough space in the inode root.  The number of records in that
493          * child block might be less than the standard minrecs, but that's ok
494          * provided that there's only one direct child of the root.
495          */
496         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
497             level == cur->bc_nlevels - 2) {
498                 struct xfs_btree_block  *root_block;
499                 struct xfs_buf          *root_bp;
500                 int                     root_maxrecs;
501
502                 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
503                 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
504                 if (xchk_btree_check_iroot_minrecs(bs) &&
505                     (be16_to_cpu(root_block->bb_numrecs) != 1 ||
506                      numrecs <= root_maxrecs))
507                         xchk_btree_set_corrupt(bs->sc, cur, level);
508                 return;
509         }
510
511         /*
512          * Otherwise, only the root level is allowed to have fewer than minrecs
513          * records or keyptrs.
514          */
515         if (level < root_level)
516                 xchk_btree_set_corrupt(bs->sc, cur, level);
517 }
518
519 /*
520  * Grab and scrub a btree block given a btree pointer.  Returns block
521  * and buffer pointers (if applicable) if they're ok to use.
522  */
523 STATIC int
524 xchk_btree_get_block(
525         struct xchk_btree       *bs,
526         int                     level,
527         union xfs_btree_ptr     *pp,
528         struct xfs_btree_block  **pblock,
529         struct xfs_buf          **pbp)
530 {
531         xfs_failaddr_t          failed_at;
532         int                     error;
533
534         *pblock = NULL;
535         *pbp = NULL;
536
537         error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
538         if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
539             !*pblock)
540                 return error;
541
542         xfs_btree_get_block(bs->cur, level, pbp);
543         if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
544                 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
545                                 level, *pbp);
546         else
547                 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
548                                  level, *pbp);
549         if (failed_at) {
550                 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
551                 return 0;
552         }
553         if (*pbp)
554                 xchk_buffer_recheck(bs->sc, *pbp);
555
556         xchk_btree_check_minrecs(bs, level, *pblock);
557
558         /*
559          * Check the block's owner; this function absorbs error codes
560          * for us.
561          */
562         error = xchk_btree_check_owner(bs, level, *pbp);
563         if (error)
564                 return error;
565
566         /*
567          * Check the block's siblings; this function absorbs error codes
568          * for us.
569          */
570         return xchk_btree_block_check_siblings(bs, *pblock);
571 }
572
573 /*
574  * Check that the low and high keys of this block match the keys stored
575  * in the parent block.
576  */
577 STATIC void
578 xchk_btree_block_keys(
579         struct xchk_btree       *bs,
580         int                     level,
581         struct xfs_btree_block  *block)
582 {
583         union xfs_btree_key     block_keys;
584         struct xfs_btree_cur    *cur = bs->cur;
585         union xfs_btree_key     *high_bk;
586         union xfs_btree_key     *parent_keys;
587         union xfs_btree_key     *high_pk;
588         struct xfs_btree_block  *parent_block;
589         struct xfs_buf          *bp;
590
591         if (level >= cur->bc_nlevels - 1)
592                 return;
593
594         /* Calculate the keys for this block. */
595         xfs_btree_get_keys(cur, block, &block_keys);
596
597         /* Obtain the parent's copy of the keys for this block. */
598         parent_block = xfs_btree_get_block(cur, level + 1, &bp);
599         parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
600                         parent_block);
601
602         if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
603                 xchk_btree_set_corrupt(bs->sc, cur, 1);
604
605         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
606                 return;
607
608         /* Get high keys */
609         high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
610         high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
611                         parent_block);
612
613         if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
614                 xchk_btree_set_corrupt(bs->sc, cur, 1);
615 }
616
617 /*
618  * Visit all nodes and leaves of a btree.  Check that all pointers and
619  * records are in order, that the keys reflect the records, and use a callback
620  * so that the caller can verify individual records.
621  */
622 int
623 xchk_btree(
624         struct xfs_scrub                *sc,
625         struct xfs_btree_cur            *cur,
626         xchk_btree_rec_fn               scrub_fn,
627         const struct xfs_owner_info     *oinfo,
628         void                            *private)
629 {
630         union xfs_btree_ptr             ptr;
631         struct xchk_btree               *bs;
632         union xfs_btree_ptr             *pp;
633         union xfs_btree_rec             *recp;
634         struct xfs_btree_block          *block;
635         struct xfs_buf                  *bp;
636         struct check_owner              *co;
637         struct check_owner              *n;
638         size_t                          cur_sz;
639         int                             level;
640         int                             error = 0;
641
642         /*
643          * Allocate the btree scrub context from the heap, because this
644          * structure can get rather large.  Don't let a caller feed us a
645          * totally absurd size.
646          */
647         cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
648         if (cur_sz > PAGE_SIZE) {
649                 xchk_btree_set_corrupt(sc, cur, 0);
650                 return 0;
651         }
652         bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL);
653         if (!bs)
654                 return -ENOMEM;
655         bs->cur = cur;
656         bs->scrub_rec = scrub_fn;
657         bs->oinfo = oinfo;
658         bs->private = private;
659         bs->sc = sc;
660
661         /* Initialize scrub state */
662         INIT_LIST_HEAD(&bs->to_check);
663
664         /*
665          * Load the root of the btree.  The helper function absorbs
666          * error codes for us.
667          */
668         level = cur->bc_nlevels - 1;
669         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
670         if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
671                 goto out;
672         error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
673         if (error || !block)
674                 goto out;
675
676         cur->bc_levels[level].ptr = 1;
677
678         while (level < cur->bc_nlevels) {
679                 block = xfs_btree_get_block(cur, level, &bp);
680
681                 if (level == 0) {
682                         /* End of leaf, pop back towards the root. */
683                         if (cur->bc_levels[level].ptr >
684                             be16_to_cpu(block->bb_numrecs)) {
685                                 xchk_btree_block_keys(bs, level, block);
686                                 if (level < cur->bc_nlevels - 1)
687                                         cur->bc_levels[level + 1].ptr++;
688                                 level++;
689                                 continue;
690                         }
691
692                         /* Records in order for scrub? */
693                         xchk_btree_rec(bs);
694
695                         /* Call out to the record checker. */
696                         recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
697                                         block);
698                         error = bs->scrub_rec(bs, recp);
699                         if (error)
700                                 break;
701                         if (xchk_should_terminate(sc, &error) ||
702                             (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
703                                 break;
704
705                         cur->bc_levels[level].ptr++;
706                         continue;
707                 }
708
709                 /* End of node, pop back towards the root. */
710                 if (cur->bc_levels[level].ptr >
711                                         be16_to_cpu(block->bb_numrecs)) {
712                         xchk_btree_block_keys(bs, level, block);
713                         if (level < cur->bc_nlevels - 1)
714                                 cur->bc_levels[level + 1].ptr++;
715                         level++;
716                         continue;
717                 }
718
719                 /* Keys in order for scrub? */
720                 xchk_btree_key(bs, level);
721
722                 /* Drill another level deeper. */
723                 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
724                 if (!xchk_btree_ptr_ok(bs, level, pp)) {
725                         cur->bc_levels[level].ptr++;
726                         continue;
727                 }
728                 level--;
729                 error = xchk_btree_get_block(bs, level, pp, &block, &bp);
730                 if (error || !block)
731                         goto out;
732
733                 cur->bc_levels[level].ptr = 1;
734         }
735
736 out:
737         /* Process deferred owner checks on btree blocks. */
738         list_for_each_entry_safe(co, n, &bs->to_check, list) {
739                 if (!error && bs->cur)
740                         error = xchk_btree_check_block_owner(bs, co->level,
741                                         co->daddr);
742                 list_del(&co->list);
743                 kmem_free(co);
744         }
745         kmem_free(bs);
746
747         return error;
748 }