fs/xfs/xfs_bmap_item.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_format.h"
   9 #include "xfs_log_format.h"
  10 #include "xfs_trans_resv.h"
  11 #include "xfs_bit.h"
  12 #include "xfs_shared.h"
  13 #include "xfs_mount.h"
  14 #include "xfs_defer.h"
  15 #include "xfs_inode.h"
  16 #include "xfs_trans.h"
  17 #include "xfs_trans_priv.h"
  18 #include "xfs_bmap_item.h"
  19 #include "xfs_log.h"
  20 #include "xfs_bmap.h"
  21 #include "xfs_icache.h"
  22 #include "xfs_bmap_btree.h"
  23 #include "xfs_trans_space.h"
  24 #include "xfs_error.h"
  25 #include "xfs_quota.h"
  26
  27 kmem_zone_t     *xfs_bui_zone;
  28 kmem_zone_t     *xfs_bud_zone;
  29
  30 static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
  31 {
  32         return container_of(lip, struct xfs_bui_log_item, bui_item);
  33 }
  34
  35 void
  36 xfs_bui_item_free(
  37         struct xfs_bui_log_item *buip)
  38 {
  39         kmem_zone_free(xfs_bui_zone, buip);
  40 }
  41
  42 /*
  43  * Freeing the BUI requires that we remove it from the AIL if it has already
  44  * been placed there. However, the BUI may not yet have been placed in the AIL
  45  * when called by xfs_bui_release() from BUD processing due to the ordering of
  46  * committed vs unpin operations in bulk insert operations. Hence the reference
  47  * count to ensure only the last caller frees the BUI.
  48  */
  49 void
  50 xfs_bui_release(
  51         struct xfs_bui_log_item *buip)
  52 {
  53         ASSERT(atomic_read(&buip->bui_refcount) > 0);
  54         if (atomic_dec_and_test(&buip->bui_refcount)) {
  55                 xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
  56                 xfs_bui_item_free(buip);
  57         }
  58 }
  59
  60
  61 STATIC void
  62 xfs_bui_item_size(
  63         struct xfs_log_item     *lip,
  64         int                     *nvecs,
  65         int                     *nbytes)
  66 {
  67         struct xfs_bui_log_item *buip = BUI_ITEM(lip);
  68
  69         *nvecs += 1;
  70         *nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
  71 }
  72
  73 /*
  74  * This is called to fill in the vector of log iovecs for the
  75  * given bui log item. We use only 1 iovec, and we point that
  76  * at the bui_log_format structure embedded in the bui item.
  77  * It is at this point that we assert that all of the extent
  78  * slots in the bui item have been filled.
  79  */
  80 STATIC void
  81 xfs_bui_item_format(
  82         struct xfs_log_item     *lip,
  83         struct xfs_log_vec      *lv)
  84 {
  85         struct xfs_bui_log_item *buip = BUI_ITEM(lip);
  86         struct xfs_log_iovec    *vecp = NULL;
  87
  88         ASSERT(atomic_read(&buip->bui_next_extent) ==
  89                         buip->bui_format.bui_nextents);
  90
  91         buip->bui_format.bui_type = XFS_LI_BUI;
  92         buip->bui_format.bui_size = 1;
  93
  94         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format,
  95                         xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents));
  96 }
  97
  98 /*
  99  * The unpin operation is the last place an BUI is manipulated in the log. It is
 100  * either inserted in the AIL or aborted in the event of a log I/O error. In
 101  * either case, the BUI transaction has been successfully committed to make it
 102  * this far. Therefore, we expect whoever committed the BUI to either construct
 103  * and commit the BUD or drop the BUD's reference in the event of error. Simply
 104  * drop the log's BUI reference now that the log is done with it.
 105  */
 106 STATIC void
 107 xfs_bui_item_unpin(
 108         struct xfs_log_item     *lip,
 109         int                     remove)
 110 {
 111         struct xfs_bui_log_item *buip = BUI_ITEM(lip);
 112
 113         xfs_bui_release(buip);
 114 }
 115
 116 /*
 117  * The BUI has been either committed or aborted if the transaction has been
 118  * cancelled. If the transaction was cancelled, an BUD isn't going to be
 119  * constructed and thus we free the BUI here directly.
 120  */
 121 STATIC void
 122 xfs_bui_item_release(
 123         struct xfs_log_item     *lip)
 124 {
 125         xfs_bui_release(BUI_ITEM(lip));
 126 }
 127
 128 static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip)
 129 {
 130         return container_of(lip, struct xfs_bud_log_item, bud_item);
 131 }
 132
 133 STATIC void
 134 xfs_bud_item_size(
 135         struct xfs_log_item     *lip,
 136         int                     *nvecs,
 137         int                     *nbytes)
 138 {
 139         *nvecs += 1;
 140         *nbytes += sizeof(struct xfs_bud_log_format);
 141 }
 142
 143 /*
 144  * This is called to fill in the vector of log iovecs for the
 145  * given bud log item. We use only 1 iovec, and we point that
 146  * at the bud_log_format structure embedded in the bud item.
 147  * It is at this point that we assert that all of the extent
 148  * slots in the bud item have been filled.
 149  */
 150 STATIC void
 151 xfs_bud_item_format(
 152         struct xfs_log_item     *lip,
 153         struct xfs_log_vec      *lv)
 154 {
 155         struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 156         struct xfs_log_iovec    *vecp = NULL;
 157
 158         budp->bud_format.bud_type = XFS_LI_BUD;
 159         budp->bud_format.bud_size = 1;
 160
 161         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format,
 162                         sizeof(struct xfs_bud_log_format));
 163 }
 164
 165 /*
 166  * The BUD is either committed or aborted if the transaction is cancelled. If
 167  * the transaction is cancelled, drop our reference to the BUI and free the
 168  * BUD.
 169  */
 170 STATIC void
 171 xfs_bud_item_release(
 172         struct xfs_log_item     *lip)
 173 {
 174         struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 175
 176         xfs_bui_release(budp->bud_buip);
 177         kmem_zone_free(xfs_bud_zone, budp);
 178 }
 179
 180 static const struct xfs_item_ops xfs_bud_item_ops = {
 181         .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
 182         .iop_size       = xfs_bud_item_size,
 183         .iop_format     = xfs_bud_item_format,
 184         .iop_release    = xfs_bud_item_release,
 185 };
 186
 187 static struct xfs_bud_log_item *
 188 xfs_trans_get_bud(
 189         struct xfs_trans                *tp,
 190         struct xfs_bui_log_item         *buip)
 191 {
 192         struct xfs_bud_log_item         *budp;
 193
 194         budp = kmem_zone_zalloc(xfs_bud_zone, 0);
 195         xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
 196                           &xfs_bud_item_ops);
 197         budp->bud_buip = buip;
 198         budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
 199
 200         xfs_trans_add_item(tp, &budp->bud_item);
 201         return budp;
 202 }
 203
 204 /*
 205  * Finish an bmap update and log it to the BUD. Note that the
 206  * transaction is marked dirty regardless of whether the bmap update
 207  * succeeds or fails to support the BUI/BUD lifecycle rules.
 208  */
 209 static int
 210 xfs_trans_log_finish_bmap_update(
 211         struct xfs_trans                *tp,
 212         struct xfs_bud_log_item         *budp,
 213         enum xfs_bmap_intent_type       type,
 214         struct xfs_inode                *ip,
 215         int                             whichfork,
 216         xfs_fileoff_t                   startoff,
 217         xfs_fsblock_t                   startblock,
 218         xfs_filblks_t                   *blockcount,
 219         xfs_exntst_t                    state)
 220 {
 221         int                             error;
 222
 223         error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
 224                         startblock, blockcount, state);
 225
 226         /*
 227          * Mark the transaction dirty, even on error. This ensures the
 228          * transaction is aborted, which:
 229          *
 230          * 1.) releases the BUI and frees the BUD
 231          * 2.) shuts down the filesystem
 232          */
 233         tp->t_flags |= XFS_TRANS_DIRTY;
 234         set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
 235
 236         return error;
 237 }
 238
 239 /* Sort bmap intents by inode. */
 240 static int
 241 xfs_bmap_update_diff_items(
 242         void                            *priv,
 243         struct list_head                *a,
 244         struct list_head                *b)
 245 {
 246         struct xfs_bmap_intent          *ba;
 247         struct xfs_bmap_intent          *bb;
 248
 249         ba = container_of(a, struct xfs_bmap_intent, bi_list);
 250         bb = container_of(b, struct xfs_bmap_intent, bi_list);
 251         return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
 252 }
 253
 254 /* Set the map extent flags for this mapping. */
 255 static void
 256 xfs_trans_set_bmap_flags(
 257         struct xfs_map_extent           *bmap,
 258         enum xfs_bmap_intent_type       type,
 259         int                             whichfork,
 260         xfs_exntst_t                    state)
 261 {
 262         bmap->me_flags = 0;
 263         switch (type) {
 264         case XFS_BMAP_MAP:
 265         case XFS_BMAP_UNMAP:
 266                 bmap->me_flags = type;
 267                 break;
 268         default:
 269                 ASSERT(0);
 270         }
 271         if (state == XFS_EXT_UNWRITTEN)
 272                 bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
 273         if (whichfork == XFS_ATTR_FORK)
 274                 bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
 275 }
 276
 277 /* Log bmap updates in the intent item. */
 278 STATIC void
 279 xfs_bmap_update_log_item(
 280         struct xfs_trans                *tp,
 281         struct xfs_bui_log_item         *buip,
 282         struct xfs_bmap_intent          *bmap)
 283 {
 284         uint                            next_extent;
 285         struct xfs_map_extent           *map;
 286
 287         tp->t_flags |= XFS_TRANS_DIRTY;
 288         set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
 289
 290         /*
 291          * atomic_inc_return gives us the value after the increment;
 292          * we want to use it as an array index so we need to subtract 1 from
 293          * it.
 294          */
 295         next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
 296         ASSERT(next_extent < buip->bui_format.bui_nextents);
 297         map = &buip->bui_format.bui_extents[next_extent];
 298         map->me_owner = bmap->bi_owner->i_ino;
 299         map->me_startblock = bmap->bi_bmap.br_startblock;
 300         map->me_startoff = bmap->bi_bmap.br_startoff;
 301         map->me_len = bmap->bi_bmap.br_blockcount;
 302         xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
 303                         bmap->bi_bmap.br_state);
 304 }
 305
 306 static struct xfs_log_item *
 307 xfs_bmap_update_create_intent(
 308         struct xfs_trans                *tp,
 309         struct list_head                *items,
 310         unsigned int                    count,
 311         bool                            sort)
 312 {
 313         struct xfs_mount                *mp = tp->t_mountp;
 314         struct xfs_bui_log_item         *buip = xfs_bui_init(mp);
 315         struct xfs_bmap_intent          *bmap;
 316
 317         ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
 318
 319         xfs_trans_add_item(tp, &buip->bui_item);
 320         if (sort)
 321                 list_sort(mp, items, xfs_bmap_update_diff_items);
 322         list_for_each_entry(bmap, items, bi_list)
 323                 xfs_bmap_update_log_item(tp, buip, bmap);
 324         return &buip->bui_item;
 325 }
 326
 327 /* Get an BUD so we can process all the deferred rmap updates. */
 328 STATIC void *
 329 xfs_bmap_update_create_done(
 330         struct xfs_trans                *tp,
 331         struct xfs_log_item             *intent,
 332         unsigned int                    count)
 333 {
 334         return xfs_trans_get_bud(tp, BUI_ITEM(intent));
 335 }
 336
 337 /* Process a deferred rmap update. */
 338 STATIC int
 339 xfs_bmap_update_finish_item(
 340         struct xfs_trans                *tp,
 341         struct list_head                *item,
 342         void                            *done_item,
 343         void                            **state)
 344 {
 345         struct xfs_bmap_intent          *bmap;
 346         xfs_filblks_t                   count;
 347         int                             error;
 348
 349         bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 350         count = bmap->bi_bmap.br_blockcount;
 351         error = xfs_trans_log_finish_bmap_update(tp, done_item,
 352                         bmap->bi_type,
 353                         bmap->bi_owner, bmap->bi_whichfork,
 354                         bmap->bi_bmap.br_startoff,
 355                         bmap->bi_bmap.br_startblock,
 356                         &count,
 357                         bmap->bi_bmap.br_state);
 358         if (!error && count > 0) {
 359                 ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
 360                 bmap->bi_bmap.br_blockcount = count;
 361                 return -EAGAIN;
 362         }
 363         kmem_free(bmap);
 364         return error;
 365 }
 366
 367 /* Abort all pending BUIs. */
 368 STATIC void
 369 xfs_bmap_update_abort_intent(
 370         struct xfs_log_item             *intent)
 371 {
 372         xfs_bui_release(BUI_ITEM(intent));
 373 }
 374
 375 /* Cancel a deferred rmap update. */
 376 STATIC void
 377 xfs_bmap_update_cancel_item(
 378         struct list_head                *item)
 379 {
 380         struct xfs_bmap_intent          *bmap;
 381
 382         bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 383         kmem_free(bmap);
 384 }
 385
 386 const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
 387         .max_items      = XFS_BUI_MAX_FAST_EXTENTS,
 388         .create_intent  = xfs_bmap_update_create_intent,
 389         .abort_intent   = xfs_bmap_update_abort_intent,
 390         .create_done    = xfs_bmap_update_create_done,
 391         .finish_item    = xfs_bmap_update_finish_item,
 392         .cancel_item    = xfs_bmap_update_cancel_item,
 393 };
 394
 395 /*
 396  * Process a bmap update intent item that was recovered from the log.
 397  * We need to update some inode's bmbt.
 398  */
 399 int
 400 xfs_bui_recover(
 401         struct xfs_bui_log_item         *buip,
 402         struct list_head                *capture_list)
 403 {
 404         int                             error = 0;
 405         unsigned int                    bui_type;
 406         struct xfs_map_extent           *bmap;
 407         xfs_fsblock_t                   startblock_fsb;
 408         xfs_fsblock_t                   inode_fsb;
 409         xfs_filblks_t                   count;
 410         struct xfs_bud_log_item         *budp;
 411         int                             whichfork;
 412         xfs_exntst_t                    state;
 413         struct xfs_trans                *tp;
 414         struct xfs_inode                *ip = NULL;
 415         struct xfs_bmbt_irec            irec;
 416         struct xfs_mount                *mp = buip->bui_item.li_mountp;
 417
 418         ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
 419
 420         /* Only one mapping operation per BUI... */
 421         if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
 422                 set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 423                 xfs_bui_release(buip);
 424                 return -EFSCORRUPTED;
 425         }
 426
 427         /*
 428          * First check the validity of the extent described by the
 429          * BUI.  If anything is bad, then toss the BUI.
 430          */
 431         bmap = &buip->bui_format.bui_extents[0];
 432         startblock_fsb = XFS_BB_TO_FSB(mp,
 433                            XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
 434         inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
 435                         XFS_INO_TO_FSB(mp, bmap->me_owner)));
 436         state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
 437                         XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
 438         whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
 439                         XFS_ATTR_FORK : XFS_DATA_FORK;
 440         bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
 441         switch (bui_type) {
 442         case XFS_BMAP_MAP:
 443         case XFS_BMAP_UNMAP:
 444                 break;
 445         default:
 446                 return -EFSCORRUPTED;
 447         }
 448         if (startblock_fsb == 0 ||
 449             bmap->me_len == 0 ||
 450             inode_fsb == 0 ||
 451             startblock_fsb >= mp->m_sb.sb_dblocks ||
 452             bmap->me_len >= mp->m_sb.sb_agblocks ||
 453             inode_fsb >= mp->m_sb.sb_dblocks ||
 454             (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
 455                 /*
 456                  * This will pull the BUI from the AIL and
 457                  * free the memory associated with it.
 458                  */
 459                 set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 460                 xfs_bui_release(buip);
 461                 return -EFSCORRUPTED;
 462         }
 463
 464         /* Grab the inode. */
 465         error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
 466         if (error)
 467                 return error;
 468
 469         error = xfs_qm_dqattach(ip);
 470         if (error)
 471                 goto err_rele;
 472
 473         if (VFS_I(ip)->i_nlink == 0)
 474                 xfs_iflags_set(ip, XFS_IRECOVERY);
 475
 476         /* Allocate transaction and do the work. */
 477         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
 478                         XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
 479         if (error)
 480                 goto err_rele;
 481
 482         budp = xfs_trans_get_bud(tp, buip);
 483         xfs_ilock(ip, XFS_ILOCK_EXCL);
 484         xfs_trans_ijoin(tp, ip, 0);
 485
 486         count = bmap->me_len;
 487         error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
 488                         whichfork, bmap->me_startoff, bmap->me_startblock,
 489                         &count, state);
 490         if (error)
 491                 goto err_cancel;
 492
 493         if (count > 0) {
 494                 ASSERT(bui_type == XFS_BMAP_UNMAP);
 495                 irec.br_startblock = bmap->me_startblock;
 496                 irec.br_blockcount = count;
 497                 irec.br_startoff = bmap->me_startoff;
 498                 irec.br_state = state;
 499                 xfs_bmap_unmap_extent(tp, ip, &irec);
 500         }
 501
 502         set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 503         /*
 504          * Commit transaction, which frees the transaction and saves the inode
 505          * for later replay activities.
 506          */
 507         error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list);
 508         if (error)
 509                 goto err_unlock;
 510
 511         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 512         xfs_irele(ip);
 513         return 0;
 514
 515 err_cancel:
 516         xfs_trans_cancel(tp);
 517 err_unlock:
 518         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 519 err_rele:
 520         xfs_irele(ip);
 521         return error;
 522 }
 523
 524 /* Relog an intent item to push the log tail forward. */
 525 static struct xfs_log_item *
 526 xfs_bui_item_relog(
 527         struct xfs_log_item             *intent,
 528         struct xfs_trans                *tp)
 529 {
 530         struct xfs_bud_log_item         *budp;
 531         struct xfs_bui_log_item         *buip;
 532         struct xfs_map_extent           *extp;
 533         unsigned int                    count;
 534
 535         count = BUI_ITEM(intent)->bui_format.bui_nextents;
 536         extp = BUI_ITEM(intent)->bui_format.bui_extents;
 537
 538         tp->t_flags |= XFS_TRANS_DIRTY;
 539         budp = xfs_trans_get_bud(tp, BUI_ITEM(intent));
 540         set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
 541
 542         buip = xfs_bui_init(tp->t_mountp);
 543         memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp));
 544         atomic_set(&buip->bui_next_extent, count);
 545         xfs_trans_add_item(tp, &buip->bui_item);
 546         set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
 547         return &buip->bui_item;
 548 }
 549
 550 static const struct xfs_item_ops xfs_bui_item_ops = {
 551         .iop_size       = xfs_bui_item_size,
 552         .iop_format     = xfs_bui_item_format,
 553         .iop_unpin      = xfs_bui_item_unpin,
 554         .iop_release    = xfs_bui_item_release,
 555         .iop_relog      = xfs_bui_item_relog,
 556 };
 557
 558 /*
 559  * Allocate and initialize an bui item with the given number of extents.
 560  */
 561 struct xfs_bui_log_item *
 562 xfs_bui_init(
 563         struct xfs_mount                *mp)
 564
 565 {
 566         struct xfs_bui_log_item         *buip;
 567
 568         buip = kmem_zone_zalloc(xfs_bui_zone, 0);
 569
 570         xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops);
 571         buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS;
 572         buip->bui_format.bui_id = (uintptr_t)(void *)buip;
 573         atomic_set(&buip->bui_next_extent, 0);
 574         atomic_set(&buip->bui_refcount, 2);
 575
 576         return buip;
 577 }