1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
19 #include "xfs_bmap_btree.h"
21 #include "xfs_rmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/btree.h"
27 /* Set us up with an inode's bmap. */
29 xchk_setup_inode_bmap(
34 error = xchk_get_inode(sc);
38 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
39 xfs_ilock(sc->ip, sc->ilock_flags);
42 * We don't want any ephemeral data fork updates sitting around
43 * while we inspect block mappings, so wait for directio to finish
44 * and flush dirty data if we have delalloc reservations.
46 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
47 sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
48 struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
50 inode_dio_wait(VFS_I(sc->ip));
53 * Try to flush all incore state to disk before we examine the
54 * space mappings for the data fork. Leave accumulated errors
55 * in the mapping for the writer threads to consume.
57 * On ENOSPC or EIO writeback errors, we continue into the
58 * extent mapping checks because write failures do not
59 * necessarily imply anything about the correctness of the file
60 * metadata. The metadata and the file data could be on
61 * completely separate devices; a media failure might only
62 * affect a subset of the disk, etc. We can handle delalloc
63 * extents in the scrubber, so leaving them in memory is fine.
65 error = filemap_fdatawrite(mapping);
67 error = filemap_fdatawait_keep_errors(mapping);
68 if (error && (error != -ENOSPC && error != -EIO))
72 /* Got the inode, lock it and we're ready to go. */
73 error = xchk_trans_alloc(sc, 0);
76 sc->ilock_flags |= XFS_ILOCK_EXCL;
77 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
80 /* scrub teardown will unlock and release the inode */
85 * Inode fork block mapping (BMBT) scrubber.
86 * More complex than the others because we have to scrub
87 * all the extents regardless of whether or not the fork
91 struct xchk_bmap_info {
93 xfs_fileoff_t lastoff;
100 /* Look for a corresponding rmap for this irec. */
103 struct xchk_bmap_info *info,
104 struct xfs_bmbt_irec *irec,
107 struct xfs_rmap_irec *rmap)
109 xfs_fileoff_t offset;
110 unsigned int rflags = 0;
114 if (info->whichfork == XFS_ATTR_FORK)
115 rflags |= XFS_RMAP_ATTR_FORK;
116 if (irec->br_state == XFS_EXT_UNWRITTEN)
117 rflags |= XFS_RMAP_UNWRITTEN;
120 * CoW staging extents are owned (on disk) by the refcountbt, so
121 * their rmaps do not have offsets.
123 if (info->whichfork == XFS_COW_FORK)
126 offset = irec->br_startoff;
129 * If the caller thinks this could be a shared bmbt extent (IOWs,
130 * any data fork extent of a reflink inode) then we have to use the
131 * range rmap lookup to make sure we get the correct owner/offset.
133 if (info->is_shared) {
134 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
135 owner, offset, rflags, rmap, &has_rmap);
137 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
138 owner, offset, rflags, rmap, &has_rmap);
140 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
144 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
149 /* Make sure that we have rmapbt records for this extent. */
152 struct xchk_bmap_info *info,
153 struct xfs_bmbt_irec *irec,
156 struct xfs_rmap_irec rmap;
157 unsigned long long rmap_end;
160 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
163 if (info->whichfork == XFS_COW_FORK)
164 owner = XFS_RMAP_OWN_COW;
166 owner = info->sc->ip->i_ino;
168 /* Find the rmap record for this irec. */
169 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
172 /* Check the rmap. */
173 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
174 if (rmap.rm_startblock > agbno ||
175 agbno + irec->br_blockcount > rmap_end)
176 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
180 * Check the logical offsets if applicable. CoW staging extents
181 * don't track logical offsets since the mappings only exist in
184 if (info->whichfork != XFS_COW_FORK) {
185 rmap_end = (unsigned long long)rmap.rm_offset +
187 if (rmap.rm_offset > irec->br_startoff ||
188 irec->br_startoff + irec->br_blockcount > rmap_end)
189 xchk_fblock_xref_set_corrupt(info->sc,
190 info->whichfork, irec->br_startoff);
193 if (rmap.rm_owner != owner)
194 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
198 * Check for discrepancies between the unwritten flag in the irec and
199 * the rmap. Note that the (in-memory) CoW fork distinguishes between
200 * unwritten and written extents, but we don't track that in the rmap
201 * records because the blocks are owned (on-disk) by the refcountbt,
202 * which doesn't track unwritten state.
204 if (owner != XFS_RMAP_OWN_COW &&
205 !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
206 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
207 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
210 if (!!(info->whichfork == XFS_ATTR_FORK) !=
211 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
212 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
214 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
215 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
219 /* Cross-reference a single rtdev extent record. */
221 xchk_bmap_rt_iextent_xref(
222 struct xfs_inode *ip,
223 struct xchk_bmap_info *info,
224 struct xfs_bmbt_irec *irec)
226 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
227 irec->br_blockcount);
230 /* Cross-reference a single datadev extent record. */
232 xchk_bmap_iextent_xref(
233 struct xfs_inode *ip,
234 struct xchk_bmap_info *info,
235 struct xfs_bmbt_irec *irec)
237 struct xfs_mount *mp = info->sc->mp;
243 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
244 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
245 len = irec->br_blockcount;
247 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
248 if (!xchk_fblock_process_error(info->sc, info->whichfork,
249 irec->br_startoff, &error))
252 xchk_xref_is_used_space(info->sc, agbno, len);
253 xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
254 xchk_bmap_xref_rmap(info, irec, agbno);
255 switch (info->whichfork) {
257 if (xfs_is_reflink_inode(info->sc->ip))
261 xchk_xref_is_not_shared(info->sc, agbno,
262 irec->br_blockcount);
265 xchk_xref_is_cow_staging(info->sc, agbno,
266 irec->br_blockcount);
271 xchk_ag_free(info->sc, &info->sc->sa);
275 * Directories and attr forks should never have blocks that can't be addressed
279 xchk_bmap_dirattr_extent(
280 struct xfs_inode *ip,
281 struct xchk_bmap_info *info,
282 struct xfs_bmbt_irec *irec)
284 struct xfs_mount *mp = ip->i_mount;
287 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
290 if (!xfs_verify_dablk(mp, irec->br_startoff))
291 xchk_fblock_set_corrupt(info->sc, info->whichfork,
294 off = irec->br_startoff + irec->br_blockcount - 1;
295 if (!xfs_verify_dablk(mp, off))
296 xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
299 /* Scrub a single extent record. */
302 struct xfs_inode *ip,
303 struct xchk_bmap_info *info,
304 struct xfs_bmbt_irec *irec)
306 struct xfs_mount *mp = info->sc->mp;
310 * Check for out-of-order extents. This record could have come
311 * from the incore list, for which there is no ordering check.
313 if (irec->br_startoff < info->lastoff)
314 xchk_fblock_set_corrupt(info->sc, info->whichfork,
317 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
318 xchk_fblock_set_corrupt(info->sc, info->whichfork,
321 xchk_bmap_dirattr_extent(ip, info, irec);
323 /* There should never be a "hole" extent in either extent list. */
324 if (irec->br_startblock == HOLESTARTBLOCK)
325 xchk_fblock_set_corrupt(info->sc, info->whichfork,
329 * Check for delalloc extents. We never iterate the ones in the
330 * in-core extent scan, and we should never see these in the bmbt.
332 if (isnullstartblock(irec->br_startblock))
333 xchk_fblock_set_corrupt(info->sc, info->whichfork,
336 /* Make sure the extent points to a valid place. */
337 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
338 xchk_fblock_set_corrupt(info->sc, info->whichfork,
341 !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
342 xchk_fblock_set_corrupt(info->sc, info->whichfork,
345 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
346 xchk_fblock_set_corrupt(info->sc, info->whichfork,
349 /* We don't allow unwritten extents on attr forks. */
350 if (irec->br_state == XFS_EXT_UNWRITTEN &&
351 info->whichfork == XFS_ATTR_FORK)
352 xchk_fblock_set_corrupt(info->sc, info->whichfork,
355 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
359 xchk_bmap_rt_iextent_xref(ip, info, irec);
361 xchk_bmap_iextent_xref(ip, info, irec);
363 info->lastoff = irec->br_startoff + irec->br_blockcount;
367 /* Scrub a bmbt record. */
370 struct xchk_btree *bs,
371 const union xfs_btree_rec *rec)
373 struct xfs_bmbt_irec irec;
374 struct xfs_bmbt_irec iext_irec;
375 struct xfs_iext_cursor icur;
376 struct xchk_bmap_info *info = bs->private;
377 struct xfs_inode *ip = bs->cur->bc_ino.ip;
378 struct xfs_buf *bp = NULL;
379 struct xfs_btree_block *block;
380 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
385 * Check the owners of the btree blocks up to the level below
386 * the root since the verifiers don't do that.
388 if (xfs_has_crc(bs->cur->bc_mp) &&
389 bs->cur->bc_levels[0].ptr == 1) {
390 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
391 block = xfs_btree_get_block(bs->cur, i, &bp);
392 owner = be64_to_cpu(block->bb_u.l.bb_owner);
393 if (owner != ip->i_ino)
394 xchk_fblock_set_corrupt(bs->sc,
400 * Check that the incore extent tree contains an extent that matches
401 * this one exactly. We validate those cached bmaps later, so we don't
402 * need to check them here. If the incore extent tree was just loaded
403 * from disk by the scrubber, we assume that its contents match what's
404 * on disk (we still hold the ILOCK) and skip the equivalence check.
406 if (!info->was_loaded)
409 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
410 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
412 irec.br_startoff != iext_irec.br_startoff ||
413 irec.br_startblock != iext_irec.br_startblock ||
414 irec.br_blockcount != iext_irec.br_blockcount ||
415 irec.br_state != iext_irec.br_state)
416 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
421 /* Scan the btree records. */
424 struct xfs_scrub *sc,
426 struct xchk_bmap_info *info)
428 struct xfs_owner_info oinfo;
429 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
430 struct xfs_mount *mp = sc->mp;
431 struct xfs_inode *ip = sc->ip;
432 struct xfs_btree_cur *cur;
435 /* Load the incore bmap cache if it's not loaded. */
436 info->was_loaded = !xfs_need_iread_extents(ifp);
438 error = xfs_iread_extents(sc->tp, ip, whichfork);
439 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
442 /* Check the btree structure. */
443 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
444 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
445 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
446 xfs_btree_del_cursor(cur, error);
451 struct xchk_bmap_check_rmap_info {
452 struct xfs_scrub *sc;
454 struct xfs_iext_cursor icur;
457 /* Can we find bmaps that fit this rmap? */
459 xchk_bmap_check_rmap(
460 struct xfs_btree_cur *cur,
461 const struct xfs_rmap_irec *rec,
464 struct xfs_bmbt_irec irec;
465 struct xfs_rmap_irec check_rec;
466 struct xchk_bmap_check_rmap_info *sbcri = priv;
467 struct xfs_ifork *ifp;
468 struct xfs_scrub *sc = sbcri->sc;
471 /* Is this even the right fork? */
472 if (rec->rm_owner != sc->ip->i_ino)
474 if ((sbcri->whichfork == XFS_ATTR_FORK) ^
475 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
477 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
480 /* Now look up the bmbt record. */
481 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
483 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
487 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
488 &sbcri->icur, &irec);
490 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
493 * bmap extent record lengths are constrained to 2^21 blocks in length
494 * because of space constraints in the on-disk metadata structure.
495 * However, rmap extent record lengths are constrained only by AG
496 * length, so we have to loop through the bmbt to make sure that the
497 * entire rmap is covered by bmbt records.
501 if (irec.br_startoff != check_rec.rm_offset)
502 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
503 check_rec.rm_offset);
504 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
505 cur->bc_ag.pag->pag_agno,
506 check_rec.rm_startblock))
507 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
508 check_rec.rm_offset);
509 if (irec.br_blockcount > check_rec.rm_blockcount)
510 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
511 check_rec.rm_offset);
512 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
514 check_rec.rm_startblock += irec.br_blockcount;
515 check_rec.rm_offset += irec.br_blockcount;
516 check_rec.rm_blockcount -= irec.br_blockcount;
517 if (check_rec.rm_blockcount == 0)
519 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
521 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
522 check_rec.rm_offset);
526 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
531 /* Make sure each rmap has a corresponding bmbt entry. */
533 xchk_bmap_check_ag_rmaps(
534 struct xfs_scrub *sc,
536 struct xfs_perag *pag)
538 struct xchk_bmap_check_rmap_info sbcri;
539 struct xfs_btree_cur *cur;
543 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
547 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
550 sbcri.whichfork = whichfork;
551 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
552 if (error == -ECANCELED)
555 xfs_btree_del_cursor(cur, error);
556 xfs_trans_brelse(sc->tp, agf);
560 /* Make sure each rmap has a corresponding bmbt entry. */
562 xchk_bmap_check_rmaps(
563 struct xfs_scrub *sc,
566 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
567 struct xfs_perag *pag;
572 if (!xfs_has_rmapbt(sc->mp) ||
573 whichfork == XFS_COW_FORK ||
574 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
577 /* Don't support realtime rmap checks yet. */
578 if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
581 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
584 * Only do this for complex maps that are in btree format, or for
585 * situations where we would seem to have a size but zero extents.
586 * The inode repair code can zap broken iforks, which means we have
587 * to flag this bmap as corrupt if there are rmaps that need to be
591 if (whichfork == XFS_DATA_FORK)
592 zero_size = i_size_read(VFS_I(sc->ip)) == 0;
596 if (ifp->if_format != XFS_DINODE_FMT_BTREE &&
597 (zero_size || ifp->if_nextents > 0))
600 for_each_perag(sc->mp, agno, pag) {
601 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
604 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
613 * Scrub an inode fork's block mappings.
615 * First we scan every record in every btree block, if applicable.
616 * Then we unconditionally scan the incore extent cache.
620 struct xfs_scrub *sc,
623 struct xfs_bmbt_irec irec;
624 struct xchk_bmap_info info = { NULL };
625 struct xfs_mount *mp = sc->mp;
626 struct xfs_inode *ip = sc->ip;
627 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
628 xfs_fileoff_t endoff;
629 struct xfs_iext_cursor icur;
632 /* Non-existent forks can be ignored. */
636 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
637 info.whichfork = whichfork;
638 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
643 /* No CoW forks on non-reflink inodes/filesystems. */
644 if (!xfs_is_reflink_inode(ip)) {
645 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
650 if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
651 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
654 ASSERT(whichfork == XFS_DATA_FORK);
658 /* Check the fork values */
659 switch (ifp->if_format) {
660 case XFS_DINODE_FMT_UUID:
661 case XFS_DINODE_FMT_DEV:
662 case XFS_DINODE_FMT_LOCAL:
663 /* No mappings to check. */
665 case XFS_DINODE_FMT_EXTENTS:
667 case XFS_DINODE_FMT_BTREE:
668 if (whichfork == XFS_COW_FORK) {
669 xchk_fblock_set_corrupt(sc, whichfork, 0);
673 error = xchk_bmap_btree(sc, whichfork, &info);
678 xchk_fblock_set_corrupt(sc, whichfork, 0);
682 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
685 /* Find the offset of the last extent in the mapping. */
686 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
687 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
690 /* Scrub extent records. */
692 ifp = xfs_ifork_ptr(ip, whichfork);
693 for_each_xfs_iext(ifp, &icur, &irec) {
694 if (xchk_should_terminate(sc, &error) ||
695 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
697 if (isnullstartblock(irec.br_startblock))
699 if (irec.br_startoff >= endoff) {
700 xchk_fblock_set_corrupt(sc, whichfork,
704 error = xchk_bmap_iextent(ip, &info, &irec);
709 error = xchk_bmap_check_rmaps(sc, whichfork);
710 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
716 /* Scrub an inode's data fork. */
719 struct xfs_scrub *sc)
721 return xchk_bmap(sc, XFS_DATA_FORK);
724 /* Scrub an inode's attr fork. */
727 struct xfs_scrub *sc)
729 return xchk_bmap(sc, XFS_ATTR_FORK);
732 /* Scrub an inode's CoW fork. */
735 struct xfs_scrub *sc)
737 if (!xfs_is_reflink_inode(sc->ip))
740 return xchk_bmap(sc, XFS_COW_FORK);