libxfs/xfs_trans_resv.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
   4  * Copyright (C) 2010 Red Hat, Inc.
   5  * All Rights Reserved.
   6  */
   7 #include "xfs.h"
   8 #include "xfs_fs.h"
   9 #include "xfs_shared.h"
  10 #include "xfs_format.h"
  11 #include "xfs_log_format.h"
  12 #include "xfs_trans_resv.h"
  13 #include "xfs_mount.h"
  14 #include "xfs_da_format.h"
  15 #include "xfs_da_btree.h"
  16 #include "xfs_inode.h"
  17 #include "xfs_bmap_btree.h"
  18 #include "xfs_quota.h"
  19 #include "xfs_trans.h"
  20 #include "xfs_qm.h"
  21 #include "xfs_trans_space.h"
  22 #include "xfs_rtbitmap.h"
  23
  24 #define _ALLOC  true
  25 #define _FREE   false
  26
  27 /*
  28  * A buffer has a format structure overhead in the log in addition
  29  * to the data, so we need to take this into account when reserving
  30  * space in a transaction for a buffer.  Round the space required up
  31  * to a multiple of 128 bytes so that we don't change the historical
  32  * reservation that has been used for this overhead.
  33  */
  34 STATIC uint
  35 xfs_buf_log_overhead(void)
  36 {
  37         return round_up(sizeof(struct xlog_op_header) +
  38                         sizeof(struct xfs_buf_log_format), 128);
  39 }
  40
  41 /*
  42  * Calculate out transaction log reservation per item in bytes.
  43  *
  44  * The nbufs argument is used to indicate the number of items that
  45  * will be changed in a transaction.  size is used to tell how many
  46  * bytes should be reserved per item.
  47  */
  48 STATIC uint
  49 xfs_calc_buf_res(
  50         uint            nbufs,
  51         uint            size)
  52 {
  53         return nbufs * (size + xfs_buf_log_overhead());
  54 }
  55
  56 /*
  57  * Per-extent log reservation for the btree changes involved in freeing or
  58  * allocating an extent.  In classic XFS there were two trees that will be
  59  * modified (bnobt + cntbt).  With rmap enabled, there are three trees
  60  * (rmapbt).  The number of blocks reserved is based on the formula:
  61  *
  62  * num trees * ((2 blocks/level * max depth) - 1)
  63  *
  64  * Keep in mind that max depth is calculated separately for each type of tree.
  65  */
  66 uint
  67 xfs_allocfree_block_count(
  68         struct xfs_mount *mp,
  69         uint            num_ops)
  70 {
  71         uint            blocks;
  72
  73         blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
  74         if (xfs_has_rmapbt(mp))
  75                 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
  76
  77         return blocks;
  78 }
  79
  80 /*
  81  * Per-extent log reservation for refcount btree changes.  These are never done
  82  * in the same transaction as an allocation or a free, so we compute them
  83  * separately.
  84  */
  85 static unsigned int
  86 xfs_refcountbt_block_count(
  87         struct xfs_mount        *mp,
  88         unsigned int            num_ops)
  89 {
  90         return num_ops * (2 * mp->m_refc_maxlevels - 1);
  91 }
  92
  93 /*
  94  * Logging inodes is really tricksy. They are logged in memory format,
  95  * which means that what we write into the log doesn't directly translate into
  96  * the amount of space they use on disk.
  97  *
  98  * Case in point - btree format forks in memory format use more space than the
  99  * on-disk format. In memory, the buffer contains a normal btree block header so
 100  * the btree code can treat it as though it is just another generic buffer.
 101  * However, when we write it to the inode fork, we don't write all of this
 102  * header as it isn't needed. e.g. the root is only ever in the inode, so
 103  * there's no need for sibling pointers which would waste 16 bytes of space.
 104  *
 105  * Hence when we have an inode with a maximally sized btree format fork, then
 106  * amount of information we actually log is greater than the size of the inode
 107  * on disk. Hence we need an inode reservation function that calculates all this
 108  * correctly. So, we log:
 109  *
 110  * - 4 log op headers for object
 111  *      - for the ilf, the inode core and 2 forks
 112  * - inode log format object
 113  * - the inode core
 114  * - two inode forks containing bmap btree root blocks.
 115  *      - the btree data contained by both forks will fit into the inode size,
 116  *        hence when combined with the inode core above, we have a total of the
 117  *        actual inode size.
 118  *      - the BMBT headers need to be accounted separately, as they are
 119  *        additional to the records and pointers that fit inside the inode
 120  *        forks.
 121  */
 122 STATIC uint
 123 xfs_calc_inode_res(
 124         struct xfs_mount        *mp,
 125         uint                    ninodes)
 126 {
 127         return ninodes *
 128                 (4 * sizeof(struct xlog_op_header) +
 129                  sizeof(struct xfs_inode_log_format) +
 130                  mp->m_sb.sb_inodesize +
 131                  2 * XFS_BMBT_BLOCK_LEN(mp));
 132 }
 133
 134 /*
 135  * Inode btree record insertion/removal modifies the inode btree and free space
 136  * btrees (since the inobt does not use the agfl). This requires the following
 137  * reservation:
 138  *
 139  * the inode btree: max depth * blocksize
 140  * the allocation btrees: 2 trees * (max depth - 1) * block size
 141  *
 142  * The caller must account for SB and AG header modifications, etc.
 143  */
 144 STATIC uint
 145 xfs_calc_inobt_res(
 146         struct xfs_mount        *mp)
 147 {
 148         return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
 149                         XFS_FSB_TO_B(mp, 1)) +
 150                                 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 151                         XFS_FSB_TO_B(mp, 1));
 152 }
 153
 154 /*
 155  * The free inode btree is a conditional feature. The behavior differs slightly
 156  * from that of the traditional inode btree in that the finobt tracks records
 157  * for inode chunks with at least one free inode. A record can be removed from
 158  * the tree during individual inode allocation. Therefore the finobt
 159  * reservation is unconditional for both the inode chunk allocation and
 160  * individual inode allocation (modify) cases.
 161  *
 162  * Behavior aside, the reservation for finobt modification is equivalent to the
 163  * traditional inobt: cover a full finobt shape change plus block allocation.
 164  */
 165 STATIC uint
 166 xfs_calc_finobt_res(
 167         struct xfs_mount        *mp)
 168 {
 169         if (!xfs_has_finobt(mp))
 170                 return 0;
 171
 172         return xfs_calc_inobt_res(mp);
 173 }
 174
 175 /*
 176  * Calculate the reservation required to allocate or free an inode chunk. This
 177  * includes:
 178  *
 179  * the allocation btrees: 2 trees * (max depth - 1) * block size
 180  * the inode chunk: m_ino_geo.ialloc_blks * N
 181  *
 182  * The size N of the inode chunk reservation depends on whether it is for
 183  * allocation or free and which type of create transaction is in use. An inode
 184  * chunk free always invalidates the buffers and only requires reservation for
 185  * headers (N == 0). An inode chunk allocation requires a chunk sized
 186  * reservation on v4 and older superblocks to initialize the chunk. No chunk
 187  * reservation is required for allocation on v5 supers, which use ordered
 188  * buffers to initialize.
 189  */
 190 STATIC uint
 191 xfs_calc_inode_chunk_res(
 192         struct xfs_mount        *mp,
 193         bool                    alloc)
 194 {
 195         uint                    res, size = 0;
 196
 197         res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 198                                XFS_FSB_TO_B(mp, 1));
 199         if (alloc) {
 200                 /* icreate tx uses ordered buffers */
 201                 if (xfs_has_v3inodes(mp))
 202                         return res;
 203                 size = XFS_FSB_TO_B(mp, 1);
 204         }
 205
 206         res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
 207         return res;
 208 }
 209
 210 /*
 211  * Per-extent log reservation for the btree changes involved in freeing or
 212  * allocating a realtime extent.  We have to be able to log as many rtbitmap
 213  * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
 214  * extents, as well as the realtime summary block.
 215  */
 216 static unsigned int
 217 xfs_rtalloc_block_count(
 218         struct xfs_mount        *mp,
 219         unsigned int            num_ops)
 220 {
 221         unsigned int            rtbmp_blocks;
 222         xfs_rtxlen_t            rtxlen;
 223
 224         rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
 225         rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
 226         return (rtbmp_blocks + 1) * num_ops;
 227 }
 228
 229 /*
 230  * Various log reservation values.
 231  *
 232  * These are based on the size of the file system block because that is what
 233  * most transactions manipulate.  Each adds in an additional 128 bytes per
 234  * item logged to try to account for the overhead of the transaction mechanism.
 235  *
 236  * Note:  Most of the reservations underestimate the number of allocation
 237  * groups into which they could free extents in the xfs_defer_finish() call.
 238  * This is because the number in the worst case is quite high and quite
 239  * unusual.  In order to fix this we need to change xfs_defer_finish() to free
 240  * extents in only a single AG at a time.  This will require changes to the
 241  * EFI code as well, however, so that the EFI for the extents not freed is
 242  * logged again in each transaction.  See SGI PV #261917.
 243  *
 244  * Reservation functions here avoid a huge stack in xfs_trans_init due to
 245  * register overflow from temporaries in the calculations.
 246  */
 247
 248 /*
 249  * Compute the log reservation required to handle the refcount update
 250  * transaction.  Refcount updates are always done via deferred log items.
 251  *
 252  * This is calculated as:
 253  * Data device refcount updates (t1):
 254  *    the agfs of the ags containing the blocks: nr_ops * sector size
 255  *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
 256  */
 257 static unsigned int
 258 xfs_calc_refcountbt_reservation(
 259         struct xfs_mount        *mp,
 260         unsigned int            nr_ops)
 261 {
 262         unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
 263
 264         if (!xfs_has_reflink(mp))
 265                 return 0;
 266
 267         return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
 268                xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
 269 }
 270
 271 /*
 272  * In a write transaction we can allocate a maximum of 2
 273  * extents.  This gives (t1):
 274  *    the inode getting the new extents: inode size
 275  *    the inode's bmap btree: max depth * block size
 276  *    the agfs of the ags from which the extents are allocated: 2 * sector
 277  *    the superblock free block counter: sector size
 278  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 279  * Or, if we're writing to a realtime file (t2):
 280  *    the inode getting the new extents: inode size
 281  *    the inode's bmap btree: max depth * block size
 282  *    the agfs of the ags from which the extents are allocated: 2 * sector
 283  *    the superblock free block counter: sector size
 284  *    the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
 285  *    the realtime summary: 1 block
 286  *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 287  * And the bmap_finish transaction can free bmap blocks in a join (t3):
 288  *    the agfs of the ags containing the blocks: 2 * sector size
 289  *    the agfls of the ags containing the blocks: 2 * sector size
 290  *    the super block free block counter: sector size
 291  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 292  * And any refcount updates that happen in a separate transaction (t4).
 293  */
 294 STATIC uint
 295 xfs_calc_write_reservation(
 296         struct xfs_mount        *mp,
 297         bool                    for_minlogsize)
 298 {
 299         unsigned int            t1, t2, t3, t4;
 300         unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
 301
 302         t1 = xfs_calc_inode_res(mp, 1) +
 303              xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
 304              xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 305              xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
 306
 307         if (xfs_has_realtime(mp)) {
 308                 t2 = xfs_calc_inode_res(mp, 1) +
 309                      xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 310                                      blksz) +
 311                      xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 312                      xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
 313                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
 314         } else {
 315                 t2 = 0;
 316         }
 317
 318         t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
 319              xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
 320
 321         /*
 322          * In the early days of reflink, we included enough reservation to log
 323          * two refcountbt splits for each transaction.  The codebase runs
 324          * refcountbt updates in separate transactions now, so to compute the
 325          * minimum log size, add the refcountbtree splits back to t1 and t3 and
 326          * do not account them separately as t4.  Reflink did not support
 327          * realtime when the reservations were established, so no adjustment to
 328          * t2 is needed.
 329          */
 330         if (for_minlogsize) {
 331                 unsigned int    adj = 0;
 332
 333                 if (xfs_has_reflink(mp))
 334                         adj = xfs_calc_buf_res(
 335                                         xfs_refcountbt_block_count(mp, 2),
 336                                         blksz);
 337                 t1 += adj;
 338                 t3 += adj;
 339                 return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
 340         }
 341
 342         t4 = xfs_calc_refcountbt_reservation(mp, 1);
 343         return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
 344 }
 345
 346 unsigned int
 347 xfs_calc_write_reservation_minlogsize(
 348         struct xfs_mount        *mp)
 349 {
 350         return xfs_calc_write_reservation(mp, true);
 351 }
 352
 353 /*
 354  * In truncating a file we free up to two extents at once.  We can modify (t1):
 355  *    the inode being truncated: inode size
 356  *    the inode's bmap btree: (max depth + 1) * block size
 357  * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
 358  *    the agf for each of the ags: 4 * sector size
 359  *    the agfl for each of the ags: 4 * sector size
 360  *    the super block to reflect the freed blocks: sector size
 361  *    worst case split in allocation btrees per extent assuming 4 extents:
 362  *              4 exts * 2 trees * (2 * max depth - 1) * block size
 363  * Or, if it's a realtime file (t3):
 364  *    the agf for each of the ags: 2 * sector size
 365  *    the agfl for each of the ags: 2 * sector size
 366  *    the super block to reflect the freed blocks: sector size
 367  *    the realtime bitmap:
 368  *              2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
 369  *    the realtime summary: 2 exts * 1 block
 370  *    worst case split in allocation btrees per extent assuming 2 extents:
 371  *              2 exts * 2 trees * (2 * max depth - 1) * block size
 372  * And any refcount updates that happen in a separate transaction (t4).
 373  */
 374 STATIC uint
 375 xfs_calc_itruncate_reservation(
 376         struct xfs_mount        *mp,
 377         bool                    for_minlogsize)
 378 {
 379         unsigned int            t1, t2, t3, t4;
 380         unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
 381
 382         t1 = xfs_calc_inode_res(mp, 1) +
 383              xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
 384
 385         t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
 386              xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
 387
 388         if (xfs_has_realtime(mp)) {
 389                 t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
 390                      xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
 391                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
 392         } else {
 393                 t3 = 0;
 394         }
 395
 396         /*
 397          * In the early days of reflink, we included enough reservation to log
 398          * four refcountbt splits in the same transaction as bnobt/cntbt
 399          * updates.  The codebase runs refcountbt updates in separate
 400          * transactions now, so to compute the minimum log size, add the
 401          * refcount btree splits back here and do not compute them separately
 402          * as t4.  Reflink did not support realtime when the reservations were
 403          * established, so do not adjust t3.
 404          */
 405         if (for_minlogsize) {
 406                 if (xfs_has_reflink(mp))
 407                         t2 += xfs_calc_buf_res(
 408                                         xfs_refcountbt_block_count(mp, 4),
 409                                         blksz);
 410
 411                 return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
 412         }
 413
 414         t4 = xfs_calc_refcountbt_reservation(mp, 2);
 415         return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
 416 }
 417
 418 unsigned int
 419 xfs_calc_itruncate_reservation_minlogsize(
 420         struct xfs_mount        *mp)
 421 {
 422         return xfs_calc_itruncate_reservation(mp, true);
 423 }
 424
 425 /*
 426  * In renaming a files we can modify:
 427  *    the five inodes involved: 5 * inode size
 428  *    the two directory btrees: 2 * (max depth + v2) * dir block size
 429  *    the two directory bmap btrees: 2 * max depth * block size
 430  * And the bmap_finish transaction can free dir and bmap blocks (two sets
 431  *      of bmap blocks) giving:
 432  *    the agf for the ags in which the blocks live: 3 * sector size
 433  *    the agfl for the ags in which the blocks live: 3 * sector size
 434  *    the superblock for the free block count: sector size
 435  *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
 436  */
 437 STATIC uint
 438 xfs_calc_rename_reservation(
 439         struct xfs_mount        *mp)
 440 {
 441         return XFS_DQUOT_LOGRES(mp) +
 442                 max((xfs_calc_inode_res(mp, 5) +
 443                      xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
 444                                       XFS_FSB_TO_B(mp, 1))),
 445                     (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
 446                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
 447                                       XFS_FSB_TO_B(mp, 1))));
 448 }
 449
 450 /*
 451  * For removing an inode from unlinked list at first, we can modify:
 452  *    the agi hash list and counters: sector size
 453  *    the on disk inode before ours in the agi hash list: inode cluster size
 454  *    the on disk inode in the agi hash list: inode cluster size
 455  */
 456 STATIC uint
 457 xfs_calc_iunlink_remove_reservation(
 458         struct xfs_mount        *mp)
 459 {
 460         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 461                2 * M_IGEO(mp)->inode_cluster_size;
 462 }
 463
 464 /*
 465  * For creating a link to an inode:
 466  *    the parent directory inode: inode size
 467  *    the linked inode: inode size
 468  *    the directory btree could split: (max depth + v2) * dir block size
 469  *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 470  * And the bmap_finish transaction can free some bmap blocks giving:
 471  *    the agf for the ag in which the blocks live: sector size
 472  *    the agfl for the ag in which the blocks live: sector size
 473  *    the superblock for the free block count: sector size
 474  *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 475  */
 476 STATIC uint
 477 xfs_calc_link_reservation(
 478         struct xfs_mount        *mp)
 479 {
 480         return XFS_DQUOT_LOGRES(mp) +
 481                 xfs_calc_iunlink_remove_reservation(mp) +
 482                 max((xfs_calc_inode_res(mp, 2) +
 483                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
 484                                       XFS_FSB_TO_B(mp, 1))),
 485                     (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 486                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 487                                       XFS_FSB_TO_B(mp, 1))));
 488 }
 489
 490 /*
 491  * For adding an inode to unlinked list we can modify:
 492  *    the agi hash list: sector size
 493  *    the on disk inode: inode cluster size
 494  */
 495 STATIC uint
 496 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
 497 {
 498         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 499                         M_IGEO(mp)->inode_cluster_size;
 500 }
 501
 502 /*
 503  * For removing a directory entry we can modify:
 504  *    the parent directory inode: inode size
 505  *    the removed inode: inode size
 506  *    the directory btree could join: (max depth + v2) * dir block size
 507  *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 508  * And the bmap_finish transaction can free the dir and bmap blocks giving:
 509  *    the agf for the ag in which the blocks live: 2 * sector size
 510  *    the agfl for the ag in which the blocks live: 2 * sector size
 511  *    the superblock for the free block count: sector size
 512  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 513  */
 514 STATIC uint
 515 xfs_calc_remove_reservation(
 516         struct xfs_mount        *mp)
 517 {
 518         return XFS_DQUOT_LOGRES(mp) +
 519                 xfs_calc_iunlink_add_reservation(mp) +
 520                 max((xfs_calc_inode_res(mp, 2) +
 521                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
 522                                       XFS_FSB_TO_B(mp, 1))),
 523                     (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
 524                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
 525                                       XFS_FSB_TO_B(mp, 1))));
 526 }
 527
 528 /*
 529  * For create, break it in to the two cases that the transaction
 530  * covers. We start with the modify case - allocation done by modification
 531  * of the state of existing inodes - and the allocation case.
 532  */
 533
 534 /*
 535  * For create we can modify:
 536  *    the parent directory inode: inode size
 537  *    the new inode: inode size
 538  *    the inode btree entry: block size
 539  *    the superblock for the nlink flag: sector size
 540  *    the directory btree: (max depth + v2) * dir block size
 541  *    the directory inode's bmap btree: (max depth + v2) * block size
 542  *    the finobt (record modification and allocation btrees)
 543  */
 544 STATIC uint
 545 xfs_calc_create_resv_modify(
 546         struct xfs_mount        *mp)
 547 {
 548         return xfs_calc_inode_res(mp, 2) +
 549                 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 550                 (uint)XFS_FSB_TO_B(mp, 1) +
 551                 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
 552                 xfs_calc_finobt_res(mp);
 553 }
 554
 555 /*
 556  * For icreate we can allocate some inodes giving:
 557  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
 558  *    the superblock for the nlink flag: sector size
 559  *    the inode chunk (allocation, optional init)
 560  *    the inobt (record insertion)
 561  *    the finobt (optional, record insertion)
 562  */
 563 STATIC uint
 564 xfs_calc_icreate_resv_alloc(
 565         struct xfs_mount        *mp)
 566 {
 567         return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
 568                 mp->m_sb.sb_sectsize +
 569                 xfs_calc_inode_chunk_res(mp, _ALLOC) +
 570                 xfs_calc_inobt_res(mp) +
 571                 xfs_calc_finobt_res(mp);
 572 }
 573
 574 STATIC uint
 575 xfs_calc_icreate_reservation(xfs_mount_t *mp)
 576 {
 577         return XFS_DQUOT_LOGRES(mp) +
 578                 max(xfs_calc_icreate_resv_alloc(mp),
 579                     xfs_calc_create_resv_modify(mp));
 580 }
 581
 582 STATIC uint
 583 xfs_calc_create_tmpfile_reservation(
 584         struct xfs_mount        *mp)
 585 {
 586         uint    res = XFS_DQUOT_LOGRES(mp);
 587
 588         res += xfs_calc_icreate_resv_alloc(mp);
 589         return res + xfs_calc_iunlink_add_reservation(mp);
 590 }
 591
 592 /*
 593  * Making a new directory is the same as creating a new file.
 594  */
 595 STATIC uint
 596 xfs_calc_mkdir_reservation(
 597         struct xfs_mount        *mp)
 598 {
 599         return xfs_calc_icreate_reservation(mp);
 600 }
 601
 602
 603 /*
 604  * Making a new symplink is the same as creating a new file, but
 605  * with the added blocks for remote symlink data which can be up to 1kB in
 606  * length (XFS_SYMLINK_MAXLEN).
 607  */
 608 STATIC uint
 609 xfs_calc_symlink_reservation(
 610         struct xfs_mount        *mp)
 611 {
 612         return xfs_calc_icreate_reservation(mp) +
 613                xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
 614 }
 615
 616 /*
 617  * In freeing an inode we can modify:
 618  *    the inode being freed: inode size
 619  *    the super block free inode counter, AGF and AGFL: sector size
 620  *    the on disk inode (agi unlinked list removal)
 621  *    the inode chunk (invalidated, headers only)
 622  *    the inode btree
 623  *    the finobt (record insertion, removal or modification)
 624  *
 625  * Note that the inode chunk res. includes an allocfree res. for freeing of the
 626  * inode chunk. This is technically extraneous because the inode chunk free is
 627  * deferred (it occurs after a transaction roll). Include the extra reservation
 628  * anyways since we've had reports of ifree transaction overruns due to too many
 629  * agfl fixups during inode chunk frees.
 630  */
 631 STATIC uint
 632 xfs_calc_ifree_reservation(
 633         struct xfs_mount        *mp)
 634 {
 635         return XFS_DQUOT_LOGRES(mp) +
 636                 xfs_calc_inode_res(mp, 1) +
 637                 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 638                 xfs_calc_iunlink_remove_reservation(mp) +
 639                 xfs_calc_inode_chunk_res(mp, _FREE) +
 640                 xfs_calc_inobt_res(mp) +
 641                 xfs_calc_finobt_res(mp);
 642 }
 643
 644 /*
 645  * When only changing the inode we log the inode and possibly the superblock
 646  * We also add a bit of slop for the transaction stuff.
 647  */
 648 STATIC uint
 649 xfs_calc_ichange_reservation(
 650         struct xfs_mount        *mp)
 651 {
 652         return XFS_DQUOT_LOGRES(mp) +
 653                 xfs_calc_inode_res(mp, 1) +
 654                 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
 655
 656 }
 657
 658 /*
 659  * Growing the data section of the filesystem.
 660  *      superblock
 661  *      agi and agf
 662  *      allocation btrees
 663  */
 664 STATIC uint
 665 xfs_calc_growdata_reservation(
 666         struct xfs_mount        *mp)
 667 {
 668         return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 669                 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 670                                  XFS_FSB_TO_B(mp, 1));
 671 }
 672
 673 /*
 674  * Growing the rt section of the filesystem.
 675  * In the first set of transactions (ALLOC) we allocate space to the
 676  * bitmap or summary files.
 677  *      superblock: sector size
 678  *      agf of the ag from which the extent is allocated: sector size
 679  *      bmap btree for bitmap/summary inode: max depth * blocksize
 680  *      bitmap/summary inode: inode size
 681  *      allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
 682  */
 683 STATIC uint
 684 xfs_calc_growrtalloc_reservation(
 685         struct xfs_mount        *mp)
 686 {
 687         return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
 688                 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 689                                  XFS_FSB_TO_B(mp, 1)) +
 690                 xfs_calc_inode_res(mp, 1) +
 691                 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 692                                  XFS_FSB_TO_B(mp, 1));
 693 }
 694
 695 /*
 696  * Growing the rt section of the filesystem.
 697  * In the second set of transactions (ZERO) we zero the new metadata blocks.
 698  *      one bitmap/summary block: blocksize
 699  */
 700 STATIC uint
 701 xfs_calc_growrtzero_reservation(
 702         struct xfs_mount        *mp)
 703 {
 704         return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
 705 }
 706
 707 /*
 708  * Growing the rt section of the filesystem.
 709  * In the third set of transactions (FREE) we update metadata without
 710  * allocating any new blocks.
 711  *      superblock: sector size
 712  *      bitmap inode: inode size
 713  *      summary inode: inode size
 714  *      one bitmap block: blocksize
 715  *      summary blocks: new summary size
 716  */
 717 STATIC uint
 718 xfs_calc_growrtfree_reservation(
 719         struct xfs_mount        *mp)
 720 {
 721         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 722                 xfs_calc_inode_res(mp, 2) +
 723                 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
 724                 xfs_calc_buf_res(1, mp->m_rsumsize);
 725 }
 726
 727 /*
 728  * Logging the inode modification timestamp on a synchronous write.
 729  *      inode
 730  */
 731 STATIC uint
 732 xfs_calc_swrite_reservation(
 733         struct xfs_mount        *mp)
 734 {
 735         return xfs_calc_inode_res(mp, 1);
 736 }
 737
 738 /*
 739  * Logging the inode mode bits when writing a setuid/setgid file
 740  *      inode
 741  */
 742 STATIC uint
 743 xfs_calc_writeid_reservation(
 744         struct xfs_mount        *mp)
 745 {
 746         return xfs_calc_inode_res(mp, 1);
 747 }
 748
 749 /*
 750  * Converting the inode from non-attributed to attributed.
 751  *      the inode being converted: inode size
 752  *      agf block and superblock (for block allocation)
 753  *      the new block (directory sized)
 754  *      bmap blocks for the new directory block
 755  *      allocation btrees
 756  */
 757 STATIC uint
 758 xfs_calc_addafork_reservation(
 759         struct xfs_mount        *mp)
 760 {
 761         return XFS_DQUOT_LOGRES(mp) +
 762                 xfs_calc_inode_res(mp, 1) +
 763                 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
 764                 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
 765                 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
 766                                  XFS_FSB_TO_B(mp, 1)) +
 767                 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 768                                  XFS_FSB_TO_B(mp, 1));
 769 }
 770
 771 /*
 772  * Removing the attribute fork of a file
 773  *    the inode being truncated: inode size
 774  *    the inode's bmap btree: max depth * block size
 775  * And the bmap_finish transaction can free the blocks and bmap blocks:
 776  *    the agf for each of the ags: 4 * sector size
 777  *    the agfl for each of the ags: 4 * sector size
 778  *    the super block to reflect the freed blocks: sector size
 779  *    worst case split in allocation btrees per extent assuming 4 extents:
 780  *              4 exts * 2 trees * (2 * max depth - 1) * block size
 781  */
 782 STATIC uint
 783 xfs_calc_attrinval_reservation(
 784         struct xfs_mount        *mp)
 785 {
 786         return max((xfs_calc_inode_res(mp, 1) +
 787                     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
 788                                      XFS_FSB_TO_B(mp, 1))),
 789                    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
 790                     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
 791                                      XFS_FSB_TO_B(mp, 1))));
 792 }
 793
 794 /*
 795  * Setting an attribute at mount time.
 796  *      the inode getting the attribute
 797  *      the superblock for allocations
 798  *      the agfs extents are allocated from
 799  *      the attribute btree * max depth
 800  *      the inode allocation btree
 801  * Since attribute transaction space is dependent on the size of the attribute,
 802  * the calculation is done partially at mount time and partially at runtime(see
 803  * below).
 804  */
 805 STATIC uint
 806 xfs_calc_attrsetm_reservation(
 807         struct xfs_mount        *mp)
 808 {
 809         return XFS_DQUOT_LOGRES(mp) +
 810                 xfs_calc_inode_res(mp, 1) +
 811                 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 812                 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
 813 }
 814
 815 /*
 816  * Setting an attribute at runtime, transaction space unit per block.
 817  *      the superblock for allocations: sector size
 818  *      the inode bmap btree could join or split: max depth * block size
 819  * Since the runtime attribute transaction space is dependent on the total
 820  * blocks needed for the 1st bmap, here we calculate out the space unit for
 821  * one block so that the caller could figure out the total space according
 822  * to the attibute extent length in blocks by:
 823  *      ext * M_RES(mp)->tr_attrsetrt.tr_logres
 824  */
 825 STATIC uint
 826 xfs_calc_attrsetrt_reservation(
 827         struct xfs_mount        *mp)
 828 {
 829         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 830                 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
 831                                  XFS_FSB_TO_B(mp, 1));
 832 }
 833
 834 /*
 835  * Removing an attribute.
 836  *    the inode: inode size
 837  *    the attribute btree could join: max depth * block size
 838  *    the inode bmap btree could join or split: max depth * block size
 839  * And the bmap_finish transaction can free the attr blocks freed giving:
 840  *    the agf for the ag in which the blocks live: 2 * sector size
 841  *    the agfl for the ag in which the blocks live: 2 * sector size
 842  *    the superblock for the free block count: sector size
 843  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 844  */
 845 STATIC uint
 846 xfs_calc_attrrm_reservation(
 847         struct xfs_mount        *mp)
 848 {
 849         return XFS_DQUOT_LOGRES(mp) +
 850                 max((xfs_calc_inode_res(mp, 1) +
 851                      xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
 852                                       XFS_FSB_TO_B(mp, 1)) +
 853                      (uint)XFS_FSB_TO_B(mp,
 854                                         XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
 855                      xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
 856                     (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
 857                      xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
 858                                       XFS_FSB_TO_B(mp, 1))));
 859 }
 860
 861 /*
 862  * Clearing a bad agino number in an agi hash bucket.
 863  */
 864 STATIC uint
 865 xfs_calc_clear_agi_bucket_reservation(
 866         struct xfs_mount        *mp)
 867 {
 868         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
 869 }
 870
 871 /*
 872  * Adjusting quota limits.
 873  *    the disk quota buffer: sizeof(struct xfs_disk_dquot)
 874  */
 875 STATIC uint
 876 xfs_calc_qm_setqlim_reservation(void)
 877 {
 878         return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
 879 }
 880
 881 /*
 882  * Allocating quota on disk if needed.
 883  *      the write transaction log space for quota file extent allocation
 884  *      the unit of quota allocation: one system block size
 885  */
 886 STATIC uint
 887 xfs_calc_qm_dqalloc_reservation(
 888         struct xfs_mount        *mp,
 889         bool                    for_minlogsize)
 890 {
 891         return xfs_calc_write_reservation(mp, for_minlogsize) +
 892                 xfs_calc_buf_res(1,
 893                         XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
 894 }
 895
 896 unsigned int
 897 xfs_calc_qm_dqalloc_reservation_minlogsize(
 898         struct xfs_mount        *mp)
 899 {
 900         return xfs_calc_qm_dqalloc_reservation(mp, true);
 901 }
 902
 903 /*
 904  * Syncing the incore super block changes to disk.
 905  *     the super block to reflect the changes: sector size
 906  */
 907 STATIC uint
 908 xfs_calc_sb_reservation(
 909         struct xfs_mount        *mp)
 910 {
 911         return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
 912 }
 913
 914 void
 915 xfs_trans_resv_calc(
 916         struct xfs_mount        *mp,
 917         struct xfs_trans_resv   *resp)
 918 {
 919         int                     logcount_adj = 0;
 920
 921         /*
 922          * The following transactions are logged in physical format and
 923          * require a permanent reservation on space.
 924          */
 925         resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
 926         resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
 927         resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 928
 929         resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
 930         resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
 931         resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 932
 933         resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
 934         resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
 935         resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 936
 937         resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
 938         resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
 939         resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 940
 941         resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
 942         resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
 943         resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 944
 945         resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
 946         resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
 947         resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 948
 949         resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
 950         resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
 951         resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 952
 953         resp->tr_create_tmpfile.tr_logres =
 954                         xfs_calc_create_tmpfile_reservation(mp);
 955         resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
 956         resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 957
 958         resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
 959         resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
 960         resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 961
 962         resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
 963         resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
 964         resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 965
 966         resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
 967         resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
 968         resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 969
 970         resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
 971         resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
 972         resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 973
 974         resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
 975         resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
 976         resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 977
 978         resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
 979         resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
 980         resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 981
 982         resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
 983         resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
 984         resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 985
 986         resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
 987                         false);
 988         resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
 989         resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 990
 991         /*
 992          * The following transactions are logged in logical format with
 993          * a default log count.
 994          */
 995         resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
 996         resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 997
 998         resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
 999         resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1000
1001         /* growdata requires permanent res; it can free space to the last AG */
1002         resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
1003         resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1004         resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1005
1006         /* The following transaction are logged in logical format */
1007         resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
1008         resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
1009         resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
1010         resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
1011         resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
1012         resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
1013         resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
1014
1015         /*
1016          * Add one logcount for BUI items that appear with rmap or reflink,
1017          * one logcount for refcount intent items, and one logcount for rmap
1018          * intent items.
1019          */
1020         if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
1021                 logcount_adj++;
1022         if (xfs_has_reflink(mp))
1023                 logcount_adj++;
1024         if (xfs_has_rmapbt(mp))
1025                 logcount_adj++;
1026
1027         resp->tr_itruncate.tr_logcount += logcount_adj;
1028         resp->tr_write.tr_logcount += logcount_adj;
1029         resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
1030 }