fs/xfs/libxfs/xfs_inode_buf.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "xfs_inode.h"
  14 #include "xfs_errortag.h"
  15 #include "xfs_error.h"
  16 #include "xfs_icache.h"
  17 #include "xfs_trans.h"
  18 #include "xfs_ialloc.h"
  19 #include "xfs_dir2.h"
  20
  21 #include <linux/iversion.h>
  22
  23 /*
  24  * If we are doing readahead on an inode buffer, we might be in log recovery
  25  * reading an inode allocation buffer that hasn't yet been replayed, and hence
  26  * has not had the inode cores stamped into it. Hence for readahead, the buffer
  27  * may be potentially invalid.
  28  *
  29  * If the readahead buffer is invalid, we need to mark it with an error and
  30  * clear the DONE status of the buffer so that a followup read will re-read it
  31  * from disk. We don't report the error otherwise to avoid warnings during log
  32  * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
  33  * because all we want to do is say readahead failed; there is no-one to report
  34  * the error to, so this will distinguish it from a non-ra verifier failure.
  35  * Changes to this readahead error behaviour also need to be reflected in
  36  * xfs_dquot_buf_readahead_verify().
  37  */
  38 static void
  39 xfs_inode_buf_verify(
  40         struct xfs_buf  *bp,
  41         bool            readahead)
  42 {
  43         struct xfs_mount *mp = bp->b_mount;
  44         xfs_agnumber_t  agno;
  45         int             i;
  46         int             ni;
  47
  48         /*
  49          * Validate the magic number and version of every inode in the buffer
  50          */
  51         agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
  52         ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  53         for (i = 0; i < ni; i++) {
  54                 int             di_ok;
  55                 xfs_dinode_t    *dip;
  56                 xfs_agino_t     unlinked_ino;
  57
  58                 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  59                 unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  60                 di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
  61                         xfs_dinode_good_version(mp, dip->di_version) &&
  62                         xfs_verify_agino_or_null(mp, agno, unlinked_ino);
  63                 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  64                                                 XFS_ERRTAG_ITOBP_INOTOBP))) {
  65                         if (readahead) {
  66                                 bp->b_flags &= ~XBF_DONE;
  67                                 xfs_buf_ioerror(bp, -EIO);
  68                                 return;
  69                         }
  70
  71 #ifdef DEBUG
  72                         xfs_alert(mp,
  73                                 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
  74                                 (unsigned long long)xfs_buf_daddr(bp), i,
  75                                 be16_to_cpu(dip->di_magic));
  76 #endif
  77                         xfs_buf_verifier_error(bp, -EFSCORRUPTED,
  78                                         __func__, dip, sizeof(*dip),
  79                                         NULL);
  80                         return;
  81                 }
  82         }
  83 }
  84
  85
  86 static void
  87 xfs_inode_buf_read_verify(
  88         struct xfs_buf  *bp)
  89 {
  90         xfs_inode_buf_verify(bp, false);
  91 }
  92
  93 static void
  94 xfs_inode_buf_readahead_verify(
  95         struct xfs_buf  *bp)
  96 {
  97         xfs_inode_buf_verify(bp, true);
  98 }
  99
 100 static void
 101 xfs_inode_buf_write_verify(
 102         struct xfs_buf  *bp)
 103 {
 104         xfs_inode_buf_verify(bp, false);
 105 }
 106
 107 const struct xfs_buf_ops xfs_inode_buf_ops = {
 108         .name = "xfs_inode",
 109         .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 110                      cpu_to_be16(XFS_DINODE_MAGIC) },
 111         .verify_read = xfs_inode_buf_read_verify,
 112         .verify_write = xfs_inode_buf_write_verify,
 113 };
 114
 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 116         .name = "xfs_inode_ra",
 117         .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 118                      cpu_to_be16(XFS_DINODE_MAGIC) },
 119         .verify_read = xfs_inode_buf_readahead_verify,
 120         .verify_write = xfs_inode_buf_write_verify,
 121 };
 122
 123
 124 /*
 125  * This routine is called to map an inode to the buffer containing the on-disk
 126  * version of the inode.  It returns a pointer to the buffer containing the
 127  * on-disk inode in the bpp parameter.
 128  */
 129 int
 130 xfs_imap_to_bp(
 131         struct xfs_mount        *mp,
 132         struct xfs_trans        *tp,
 133         struct xfs_imap         *imap,
 134         struct xfs_buf          **bpp)
 135 {
 136         return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 137                                    imap->im_len, XBF_UNMAPPED, bpp,
 138                                    &xfs_inode_buf_ops);
 139 }
 140
 141 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
 142 {
 143         struct timespec64       tv;
 144         uint32_t                n;
 145
 146         tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
 147         tv.tv_nsec = n;
 148
 149         return tv;
 150 }
 151
 152 /* Convert an ondisk timestamp to an incore timestamp. */
 153 struct timespec64
 154 xfs_inode_from_disk_ts(
 155         struct xfs_dinode               *dip,
 156         const xfs_timestamp_t           ts)
 157 {
 158         struct timespec64               tv;
 159         struct xfs_legacy_timestamp     *lts;
 160
 161         if (xfs_dinode_has_bigtime(dip))
 162                 return xfs_inode_decode_bigtime(be64_to_cpu(ts));
 163
 164         lts = (struct xfs_legacy_timestamp *)&ts;
 165         tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
 166         tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
 167
 168         return tv;
 169 }
 170
 171 int
 172 xfs_inode_from_disk(
 173         struct xfs_inode        *ip,
 174         struct xfs_dinode       *from)
 175 {
 176         struct inode            *inode = VFS_I(ip);
 177         int                     error;
 178         xfs_failaddr_t          fa;
 179
 180         ASSERT(ip->i_cowfp == NULL);
 181         ASSERT(ip->i_afp == NULL);
 182
 183         fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
 184         if (fa) {
 185                 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
 186                                 sizeof(*from), fa);
 187                 return -EFSCORRUPTED;
 188         }
 189
 190         /*
 191          * First get the permanent information that is needed to allocate an
 192          * inode. If the inode is unused, mode is zero and we shouldn't mess
 193          * with the uninitialized part of it.
 194          */
 195         if (!xfs_has_v3inodes(ip->i_mount))
 196                 ip->i_flushiter = be16_to_cpu(from->di_flushiter);
 197         inode->i_generation = be32_to_cpu(from->di_gen);
 198         inode->i_mode = be16_to_cpu(from->di_mode);
 199         if (!inode->i_mode)
 200                 return 0;
 201
 202         /*
 203          * Convert v1 inodes immediately to v2 inode format as this is the
 204          * minimum inode version format we support in the rest of the code.
 205          * They will also be unconditionally written back to disk as v2 inodes.
 206          */
 207         if (unlikely(from->di_version == 1)) {
 208                 set_nlink(inode, be16_to_cpu(from->di_onlink));
 209                 ip->i_projid = 0;
 210         } else {
 211                 set_nlink(inode, be32_to_cpu(from->di_nlink));
 212                 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
 213                                         be16_to_cpu(from->di_projid_lo);
 214         }
 215
 216         i_uid_write(inode, be32_to_cpu(from->di_uid));
 217         i_gid_write(inode, be32_to_cpu(from->di_gid));
 218
 219         /*
 220          * Time is signed, so need to convert to signed 32 bit before
 221          * storing in inode timestamp which may be 64 bit. Otherwise
 222          * a time before epoch is converted to a time long after epoch
 223          * on 64 bit systems.
 224          */
 225         inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
 226         inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
 227         inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
 228
 229         ip->i_disk_size = be64_to_cpu(from->di_size);
 230         ip->i_nblocks = be64_to_cpu(from->di_nblocks);
 231         ip->i_extsize = be32_to_cpu(from->di_extsize);
 232         ip->i_forkoff = from->di_forkoff;
 233         ip->i_diflags   = be16_to_cpu(from->di_flags);
 234
 235         if (from->di_dmevmask || from->di_dmstate)
 236                 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
 237
 238         if (xfs_has_v3inodes(ip->i_mount)) {
 239                 inode_set_iversion_queried(inode,
 240                                            be64_to_cpu(from->di_changecount));
 241                 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
 242                 ip->i_diflags2 = be64_to_cpu(from->di_flags2);
 243                 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
 244         }
 245
 246         error = xfs_iformat_data_fork(ip, from);
 247         if (error)
 248                 return error;
 249         if (from->di_forkoff) {
 250                 error = xfs_iformat_attr_fork(ip, from);
 251                 if (error)
 252                         goto out_destroy_data_fork;
 253         }
 254         if (xfs_is_reflink_inode(ip))
 255                 xfs_ifork_init_cow(ip);
 256         return 0;
 257
 258 out_destroy_data_fork:
 259         xfs_idestroy_fork(&ip->i_df);
 260         return error;
 261 }
 262
 263 /* Convert an incore timestamp to an ondisk timestamp. */
 264 static inline xfs_timestamp_t
 265 xfs_inode_to_disk_ts(
 266         struct xfs_inode                *ip,
 267         const struct timespec64         tv)
 268 {
 269         struct xfs_legacy_timestamp     *lts;
 270         xfs_timestamp_t                 ts;
 271
 272         if (xfs_inode_has_bigtime(ip))
 273                 return cpu_to_be64(xfs_inode_encode_bigtime(tv));
 274
 275         lts = (struct xfs_legacy_timestamp *)&ts;
 276         lts->t_sec = cpu_to_be32(tv.tv_sec);
 277         lts->t_nsec = cpu_to_be32(tv.tv_nsec);
 278
 279         return ts;
 280 }
 281
 282 void
 283 xfs_inode_to_disk(
 284         struct xfs_inode        *ip,
 285         struct xfs_dinode       *to,
 286         xfs_lsn_t               lsn)
 287 {
 288         struct inode            *inode = VFS_I(ip);
 289
 290         to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 291         to->di_onlink = 0;
 292
 293         to->di_format = xfs_ifork_format(&ip->i_df);
 294         to->di_uid = cpu_to_be32(i_uid_read(inode));
 295         to->di_gid = cpu_to_be32(i_gid_read(inode));
 296         to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
 297         to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
 298
 299         memset(to->di_pad, 0, sizeof(to->di_pad));
 300         to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
 301         to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
 302         to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
 303         to->di_nlink = cpu_to_be32(inode->i_nlink);
 304         to->di_gen = cpu_to_be32(inode->i_generation);
 305         to->di_mode = cpu_to_be16(inode->i_mode);
 306
 307         to->di_size = cpu_to_be64(ip->i_disk_size);
 308         to->di_nblocks = cpu_to_be64(ip->i_nblocks);
 309         to->di_extsize = cpu_to_be32(ip->i_extsize);
 310         to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
 311         to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
 312         to->di_forkoff = ip->i_forkoff;
 313         to->di_aformat = xfs_ifork_format(ip->i_afp);
 314         to->di_flags = cpu_to_be16(ip->i_diflags);
 315
 316         if (xfs_has_v3inodes(ip->i_mount)) {
 317                 to->di_version = 3;
 318                 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 319                 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
 320                 to->di_flags2 = cpu_to_be64(ip->i_diflags2);
 321                 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
 322                 to->di_ino = cpu_to_be64(ip->i_ino);
 323                 to->di_lsn = cpu_to_be64(lsn);
 324                 memset(to->di_pad2, 0, sizeof(to->di_pad2));
 325                 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 326                 to->di_flushiter = 0;
 327         } else {
 328                 to->di_version = 2;
 329                 to->di_flushiter = cpu_to_be16(ip->i_flushiter);
 330         }
 331 }
 332
 333 static xfs_failaddr_t
 334 xfs_dinode_verify_fork(
 335         struct xfs_dinode       *dip,
 336         struct xfs_mount        *mp,
 337         int                     whichfork)
 338 {
 339         uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 340         mode_t                  mode = be16_to_cpu(dip->di_mode);
 341         uint32_t                fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
 342         uint32_t                fork_format = XFS_DFORK_FORMAT(dip, whichfork);
 343
 344         /*
 345          * For fork types that can contain local data, check that the fork
 346          * format matches the size of local data contained within the fork.
 347          *
 348          * For all types, check that when the size says the should be in extent
 349          * or btree format, the inode isn't claiming it is in local format.
 350          */
 351         if (whichfork == XFS_DATA_FORK) {
 352                 if (S_ISDIR(mode) || S_ISLNK(mode)) {
 353                         if (be64_to_cpu(dip->di_size) <= fork_size &&
 354                             fork_format != XFS_DINODE_FMT_LOCAL)
 355                                 return __this_address;
 356                 }
 357
 358                 if (be64_to_cpu(dip->di_size) > fork_size &&
 359                     fork_format == XFS_DINODE_FMT_LOCAL)
 360                         return __this_address;
 361         }
 362
 363         switch (fork_format) {
 364         case XFS_DINODE_FMT_LOCAL:
 365                 /*
 366                  * No local regular files yet.
 367                  */
 368                 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
 369                         return __this_address;
 370                 if (di_nextents)
 371                         return __this_address;
 372                 break;
 373         case XFS_DINODE_FMT_EXTENTS:
 374                 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 375                         return __this_address;
 376                 break;
 377         case XFS_DINODE_FMT_BTREE:
 378                 if (whichfork == XFS_ATTR_FORK) {
 379                         if (di_nextents > MAXAEXTNUM)
 380                                 return __this_address;
 381                 } else if (di_nextents > MAXEXTNUM) {
 382                         return __this_address;
 383                 }
 384                 break;
 385         default:
 386                 return __this_address;
 387         }
 388         return NULL;
 389 }
 390
 391 static xfs_failaddr_t
 392 xfs_dinode_verify_forkoff(
 393         struct xfs_dinode       *dip,
 394         struct xfs_mount        *mp)
 395 {
 396         if (!dip->di_forkoff)
 397                 return NULL;
 398
 399         switch (dip->di_format)  {
 400         case XFS_DINODE_FMT_DEV:
 401                 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 402                         return __this_address;
 403                 break;
 404         case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 405         case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 406         case XFS_DINODE_FMT_BTREE:
 407                 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
 408                         return __this_address;
 409                 break;
 410         default:
 411                 return __this_address;
 412         }
 413         return NULL;
 414 }
 415
 416 xfs_failaddr_t
 417 xfs_dinode_verify(
 418         struct xfs_mount        *mp,
 419         xfs_ino_t               ino,
 420         struct xfs_dinode       *dip)
 421 {
 422         xfs_failaddr_t          fa;
 423         uint16_t                mode;
 424         uint16_t                flags;
 425         uint64_t                flags2;
 426         uint64_t                di_size;
 427
 428         if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 429                 return __this_address;
 430
 431         /* Verify v3 integrity information first */
 432         if (dip->di_version >= 3) {
 433                 if (!xfs_has_v3inodes(mp))
 434                         return __this_address;
 435                 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 436                                       XFS_DINODE_CRC_OFF))
 437                         return __this_address;
 438                 if (be64_to_cpu(dip->di_ino) != ino)
 439                         return __this_address;
 440                 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 441                         return __this_address;
 442         }
 443
 444         /* don't allow invalid i_size */
 445         di_size = be64_to_cpu(dip->di_size);
 446         if (di_size & (1ULL << 63))
 447                 return __this_address;
 448
 449         mode = be16_to_cpu(dip->di_mode);
 450         if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 451                 return __this_address;
 452
 453         /* No zero-length symlinks/dirs. */
 454         if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 455                 return __this_address;
 456
 457         /* Fork checks carried over from xfs_iformat_fork */
 458         if (mode &&
 459             be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 460                         be64_to_cpu(dip->di_nblocks))
 461                 return __this_address;
 462
 463         if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 464                 return __this_address;
 465
 466         flags = be16_to_cpu(dip->di_flags);
 467
 468         if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 469                 return __this_address;
 470
 471         /* check for illegal values of forkoff */
 472         fa = xfs_dinode_verify_forkoff(dip, mp);
 473         if (fa)
 474                 return fa;
 475
 476         /* Do we have appropriate data fork formats for the mode? */
 477         switch (mode & S_IFMT) {
 478         case S_IFIFO:
 479         case S_IFCHR:
 480         case S_IFBLK:
 481         case S_IFSOCK:
 482                 if (dip->di_format != XFS_DINODE_FMT_DEV)
 483                         return __this_address;
 484                 break;
 485         case S_IFREG:
 486         case S_IFLNK:
 487         case S_IFDIR:
 488                 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 489                 if (fa)
 490                         return fa;
 491                 break;
 492         case 0:
 493                 /* Uninitialized inode ok. */
 494                 break;
 495         default:
 496                 return __this_address;
 497         }
 498
 499         if (dip->di_forkoff) {
 500                 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 501                 if (fa)
 502                         return fa;
 503         } else {
 504                 /*
 505                  * If there is no fork offset, this may be a freshly-made inode
 506                  * in a new disk cluster, in which case di_aformat is zeroed.
 507                  * Otherwise, such an inode must be in EXTENTS format; this goes
 508                  * for freed inodes as well.
 509                  */
 510                 switch (dip->di_aformat) {
 511                 case 0:
 512                 case XFS_DINODE_FMT_EXTENTS:
 513                         break;
 514                 default:
 515                         return __this_address;
 516                 }
 517                 if (dip->di_anextents)
 518                         return __this_address;
 519         }
 520
 521         /* extent size hint validation */
 522         fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 523                         mode, flags);
 524         if (fa)
 525                 return fa;
 526
 527         /* only version 3 or greater inodes are extensively verified here */
 528         if (dip->di_version < 3)
 529                 return NULL;
 530
 531         flags2 = be64_to_cpu(dip->di_flags2);
 532
 533         /* don't allow reflink/cowextsize if we don't have reflink */
 534         if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 535              !xfs_has_reflink(mp))
 536                 return __this_address;
 537
 538         /* only regular files get reflink */
 539         if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 540                 return __this_address;
 541
 542         /* don't let reflink and realtime mix */
 543         if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 544                 return __this_address;
 545
 546         /* COW extent size hint validation */
 547         fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 548                         mode, flags, flags2);
 549         if (fa)
 550                 return fa;
 551
 552         /* bigtime iflag can only happen on bigtime filesystems */
 553         if (xfs_dinode_has_bigtime(dip) &&
 554             !xfs_has_bigtime(mp))
 555                 return __this_address;
 556
 557         return NULL;
 558 }
 559
 560 void
 561 xfs_dinode_calc_crc(
 562         struct xfs_mount        *mp,
 563         struct xfs_dinode       *dip)
 564 {
 565         uint32_t                crc;
 566
 567         if (dip->di_version < 3)
 568                 return;
 569
 570         ASSERT(xfs_has_crc(mp));
 571         crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 572                               XFS_DINODE_CRC_OFF);
 573         dip->di_crc = xfs_end_cksum(crc);
 574 }
 575
 576 /*
 577  * Validate di_extsize hint.
 578  *
 579  * 1. Extent size hint is only valid for directories and regular files.
 580  * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
 581  * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
 582  * 4. Hint cannot be larger than MAXTEXTLEN.
 583  * 5. Can be changed on directories at any time.
 584  * 6. Hint value of 0 turns off hints, clears inode flags.
 585  * 7. Extent size must be a multiple of the appropriate block size.
 586  *    For realtime files, this is the rt extent size.
 587  * 8. For non-realtime files, the extent size hint must be limited
 588  *    to half the AG size to avoid alignment extending the extent beyond the
 589  *    limits of the AG.
 590  */
 591 xfs_failaddr_t
 592 xfs_inode_validate_extsize(
 593         struct xfs_mount                *mp,
 594         uint32_t                        extsize,
 595         uint16_t                        mode,
 596         uint16_t                        flags)
 597 {
 598         bool                            rt_flag;
 599         bool                            hint_flag;
 600         bool                            inherit_flag;
 601         uint32_t                        extsize_bytes;
 602         uint32_t                        blocksize_bytes;
 603
 604         rt_flag = (flags & XFS_DIFLAG_REALTIME);
 605         hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 606         inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 607         extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 608
 609         /*
 610          * This comment describes a historic gap in this verifier function.
 611          *
 612          * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
 613          * function has never checked that the extent size hint is an integer
 614          * multiple of the realtime extent size.  Since we allow users to set
 615          * this combination  on non-rt filesystems /and/ to change the rt
 616          * extent size when adding a rt device to a filesystem, the net effect
 617          * is that users can configure a filesystem anticipating one rt
 618          * geometry and change their minds later.  Directories do not use the
 619          * extent size hint, so this is harmless for them.
 620          *
 621          * If a directory with a misaligned extent size hint is allowed to
 622          * propagate that hint into a new regular realtime file, the result
 623          * is that the inode cluster buffer verifier will trigger a corruption
 624          * shutdown the next time it is run, because the verifier has always
 625          * enforced the alignment rule for regular files.
 626          *
 627          * Because we allow administrators to set a new rt extent size when
 628          * adding a rt section, we cannot add a check to this verifier because
 629          * that will result a new source of directory corruption errors when
 630          * reading an existing filesystem.  Instead, we rely on callers to
 631          * decide when alignment checks are appropriate, and fix things up as
 632          * needed.
 633          */
 634
 635         if (rt_flag)
 636                 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
 637         else
 638                 blocksize_bytes = mp->m_sb.sb_blocksize;
 639
 640         if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 641                 return __this_address;
 642
 643         if (hint_flag && !S_ISREG(mode))
 644                 return __this_address;
 645
 646         if (inherit_flag && !S_ISDIR(mode))
 647                 return __this_address;
 648
 649         if ((hint_flag || inherit_flag) && extsize == 0)
 650                 return __this_address;
 651
 652         /* free inodes get flags set to zero but extsize remains */
 653         if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 654                 return __this_address;
 655
 656         if (extsize_bytes % blocksize_bytes)
 657                 return __this_address;
 658
 659         if (extsize > MAXEXTLEN)
 660                 return __this_address;
 661
 662         if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 663                 return __this_address;
 664
 665         return NULL;
 666 }
 667
 668 /*
 669  * Validate di_cowextsize hint.
 670  *
 671  * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
 672  *    The inode does not have to have any shared blocks, but it must be a v3.
 673  * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
 674  *    for a directory, the hint is propagated to new files.
 675  * 3. Can be changed on files & directories at any time.
 676  * 4. Hint value of 0 turns off hints, clears inode flags.
 677  * 5. Extent size must be a multiple of the appropriate block size.
 678  * 6. The extent size hint must be limited to half the AG size to avoid
 679  *    alignment extending the extent beyond the limits of the AG.
 680  */
 681 xfs_failaddr_t
 682 xfs_inode_validate_cowextsize(
 683         struct xfs_mount                *mp,
 684         uint32_t                        cowextsize,
 685         uint16_t                        mode,
 686         uint16_t                        flags,
 687         uint64_t                        flags2)
 688 {
 689         bool                            rt_flag;
 690         bool                            hint_flag;
 691         uint32_t                        cowextsize_bytes;
 692
 693         rt_flag = (flags & XFS_DIFLAG_REALTIME);
 694         hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 695         cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 696
 697         if (hint_flag && !xfs_has_reflink(mp))
 698                 return __this_address;
 699
 700         if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 701                 return __this_address;
 702
 703         if (hint_flag && cowextsize == 0)
 704                 return __this_address;
 705
 706         /* free inodes get flags set to zero but cowextsize remains */
 707         if (mode && !hint_flag && cowextsize != 0)
 708                 return __this_address;
 709
 710         if (hint_flag && rt_flag)
 711                 return __this_address;
 712
 713         if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 714                 return __this_address;
 715
 716         if (cowextsize > MAXEXTLEN)
 717                 return __this_address;
 718
 719         if (cowextsize > mp->m_sb.sb_agblocks / 2)
 720                 return __this_address;
 721
 722         return NULL;
 723 }