fs/btrfs/discard.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/jiffies.h>
   4 #include <linux/kernel.h>
   5 #include <linux/ktime.h>
   6 #include <linux/list.h>
   7 #include <linux/math64.h>
   8 #include <linux/sizes.h>
   9 #include <linux/workqueue.h>
  10 #include "ctree.h"
  11 #include "block-group.h"
  12 #include "discard.h"
  13 #include "free-space-cache.h"
  14
  15 /*
  16  * This contains the logic to handle async discard.
  17  *
  18  * Async discard manages trimming of free space outside of transaction commit.
  19  * Discarding is done by managing the block_groups on a LRU list based on free
  20  * space recency.  Two passes are used to first prioritize discarding extents
  21  * and then allow for trimming in the bitmap the best opportunity to coalesce.
  22  * The block_groups are maintained on multiple lists to allow for multiple
  23  * passes with different discard filter requirements.  A delayed work item is
  24  * used to manage discarding with timeout determined by a max of the delay
  25  * incurred by the iops rate limit, the byte rate limit, and the max delay of
  26  * BTRFS_DISCARD_MAX_DELAY.
  27  *
  28  * Note, this only keeps track of block_groups that are explicitly for data.
  29  * Mixed block_groups are not supported.
  30  *
  31  * The first list is special to manage discarding of fully free block groups.
  32  * This is necessary because we issue a final trim for a full free block group
  33  * after forgetting it.  When a block group becomes unused, instead of directly
  34  * being added to the unused_bgs list, we add it to this first list.  Then
  35  * from there, if it becomes fully discarded, we place it onto the unused_bgs
  36  * list.
  37  *
  38  * The in-memory free space cache serves as the backing state for discard.
  39  * Consequently this means there is no persistence.  We opt to load all the
  40  * block groups in as not discarded, so the mount case degenerates to the
  41  * crashing case.
  42  *
  43  * As the free space cache uses bitmaps, there exists a tradeoff between
  44  * ease/efficiency for find_free_extent() and the accuracy of discard state.
  45  * Here we opt to let untrimmed regions merge with everything while only letting
  46  * trimmed regions merge with other trimmed regions.  This can cause
  47  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
  48  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
  49  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
  50  * this resets the state and we will retry trimming the whole bitmap.  This is a
  51  * tradeoff between discard state accuracy and the cost of accounting.
  52  */
  53
  54 /* This is an initial delay to give some chance for block reuse */
  55 #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
  56 #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
  57
  58 /* Target completion latency of discarding all discardable extents */
  59 #define BTRFS_DISCARD_TARGET_MSEC       (6 * 60 * 60UL * MSEC_PER_SEC)
  60 #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
  61 #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
  62 #define BTRFS_DISCARD_MAX_IOPS          (10U)
  63
  64 /* Montonically decreasing minimum length filters after index 0 */
  65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
  66         0,
  67         BTRFS_ASYNC_DISCARD_MAX_FILTER,
  68         BTRFS_ASYNC_DISCARD_MIN_FILTER
  69 };
  70
  71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
  72                                           struct btrfs_block_group *block_group)
  73 {
  74         return &discard_ctl->discard_list[block_group->discard_index];
  75 }
  76
  77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  78                                   struct btrfs_block_group *block_group)
  79 {
  80         if (!btrfs_run_discard_work(discard_ctl))
  81                 return;
  82
  83         if (list_empty(&block_group->discard_list) ||
  84             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
  85                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
  86                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
  87                 block_group->discard_eligible_time = (ktime_get_ns() +
  88                                                       BTRFS_DISCARD_DELAY);
  89                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
  90         }
  91
  92         list_move_tail(&block_group->discard_list,
  93                        get_discard_list(discard_ctl, block_group));
  94 }
  95
  96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  97                                 struct btrfs_block_group *block_group)
  98 {
  99         if (!btrfs_is_block_group_data_only(block_group))
 100                 return;
 101
 102         spin_lock(&discard_ctl->lock);
 103         __add_to_discard_list(discard_ctl, block_group);
 104         spin_unlock(&discard_ctl->lock);
 105 }
 106
 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
 108                                        struct btrfs_block_group *block_group)
 109 {
 110         spin_lock(&discard_ctl->lock);
 111
 112         if (!btrfs_run_discard_work(discard_ctl)) {
 113                 spin_unlock(&discard_ctl->lock);
 114                 return;
 115         }
 116
 117         list_del_init(&block_group->discard_list);
 118
 119         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
 120         block_group->discard_eligible_time = (ktime_get_ns() +
 121                                               BTRFS_DISCARD_UNUSED_DELAY);
 122         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
 123         list_add_tail(&block_group->discard_list,
 124                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
 125
 126         spin_unlock(&discard_ctl->lock);
 127 }
 128
 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
 130                                      struct btrfs_block_group *block_group)
 131 {
 132         bool running = false;
 133
 134         spin_lock(&discard_ctl->lock);
 135
 136         if (block_group == discard_ctl->block_group) {
 137                 running = true;
 138                 discard_ctl->block_group = NULL;
 139         }
 140
 141         block_group->discard_eligible_time = 0;
 142         list_del_init(&block_group->discard_list);
 143
 144         spin_unlock(&discard_ctl->lock);
 145
 146         return running;
 147 }
 148
 149 /**
 150  * find_next_block_group - find block_group that's up next for discarding
 151  * @discard_ctl: discard control
 152  * @now: current time
 153  *
 154  * Iterate over the discard lists to find the next block_group up for
 155  * discarding checking the discard_eligible_time of block_group.
 156  */
 157 static struct btrfs_block_group *find_next_block_group(
 158                                         struct btrfs_discard_ctl *discard_ctl,
 159                                         u64 now)
 160 {
 161         struct btrfs_block_group *ret_block_group = NULL, *block_group;
 162         int i;
 163
 164         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 165                 struct list_head *discard_list = &discard_ctl->discard_list[i];
 166
 167                 if (!list_empty(discard_list)) {
 168                         block_group = list_first_entry(discard_list,
 169                                                        struct btrfs_block_group,
 170                                                        discard_list);
 171
 172                         if (!ret_block_group)
 173                                 ret_block_group = block_group;
 174
 175                         if (ret_block_group->discard_eligible_time < now)
 176                                 break;
 177
 178                         if (ret_block_group->discard_eligible_time >
 179                             block_group->discard_eligible_time)
 180                                 ret_block_group = block_group;
 181                 }
 182         }
 183
 184         return ret_block_group;
 185 }
 186
 187 /**
 188  * Wrap find_next_block_group()
 189  *
 190  * @discard_ctl:   discard control
 191  * @discard_state: the discard_state of the block_group after state management
 192  * @discard_index: the discard_index of the block_group after state management
 193  * @now:           time when discard was invoked, in ns
 194  *
 195  * This wraps find_next_block_group() and sets the block_group to be in use.
 196  * discard_state's control flow is managed here.  Variables related to
 197  * discard_state are reset here as needed (eg discard_cursor).  @discard_state
 198  * and @discard_index are remembered as it may change while we're discarding,
 199  * but we want the discard to execute in the context determined here.
 200  */
 201 static struct btrfs_block_group *peek_discard_list(
 202                                         struct btrfs_discard_ctl *discard_ctl,
 203                                         enum btrfs_discard_state *discard_state,
 204                                         int *discard_index, u64 now)
 205 {
 206         struct btrfs_block_group *block_group;
 207
 208         spin_lock(&discard_ctl->lock);
 209 again:
 210         block_group = find_next_block_group(discard_ctl, now);
 211
 212         if (block_group && now >= block_group->discard_eligible_time) {
 213                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
 214                     block_group->used != 0) {
 215                         if (btrfs_is_block_group_data_only(block_group))
 216                                 __add_to_discard_list(discard_ctl, block_group);
 217                         else
 218                                 list_del_init(&block_group->discard_list);
 219                         goto again;
 220                 }
 221                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
 222                         block_group->discard_cursor = block_group->start;
 223                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
 224                 }
 225                 discard_ctl->block_group = block_group;
 226         }
 227         if (block_group) {
 228                 *discard_state = block_group->discard_state;
 229                 *discard_index = block_group->discard_index;
 230         }
 231         spin_unlock(&discard_ctl->lock);
 232
 233         return block_group;
 234 }
 235
 236 /**
 237  * btrfs_discard_check_filter - updates a block groups filters
 238  * @block_group: block group of interest
 239  * @bytes: recently freed region size after coalescing
 240  *
 241  * Async discard maintains multiple lists with progressively smaller filters
 242  * to prioritize discarding based on size.  Should a free space that matches
 243  * a larger filter be returned to the free_space_cache, prioritize that discard
 244  * by moving @block_group to the proper filter.
 245  */
 246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
 247                                 u64 bytes)
 248 {
 249         struct btrfs_discard_ctl *discard_ctl;
 250
 251         if (!block_group ||
 252             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 253                 return;
 254
 255         discard_ctl = &block_group->fs_info->discard_ctl;
 256
 257         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
 258             bytes >= discard_minlen[block_group->discard_index - 1]) {
 259                 int i;
 260
 261                 remove_from_discard_list(discard_ctl, block_group);
 262
 263                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
 264                      i++) {
 265                         if (bytes >= discard_minlen[i]) {
 266                                 block_group->discard_index = i;
 267                                 add_to_discard_list(discard_ctl, block_group);
 268                                 break;
 269                         }
 270                 }
 271         }
 272 }
 273
 274 /**
 275  * btrfs_update_discard_index - moves a block group along the discard lists
 276  * @discard_ctl: discard control
 277  * @block_group: block_group of interest
 278  *
 279  * Increment @block_group's discard_index.  If it falls of the list, let it be.
 280  * Otherwise add it back to the appropriate list.
 281  */
 282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
 283                                        struct btrfs_block_group *block_group)
 284 {
 285         block_group->discard_index++;
 286         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
 287                 block_group->discard_index = 1;
 288                 return;
 289         }
 290
 291         add_to_discard_list(discard_ctl, block_group);
 292 }
 293
 294 /**
 295  * btrfs_discard_cancel_work - remove a block_group from the discard lists
 296  * @discard_ctl: discard control
 297  * @block_group: block_group of interest
 298  *
 299  * This removes @block_group from the discard lists.  If necessary, it waits on
 300  * the current work and then reschedules the delayed work.
 301  */
 302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
 303                                struct btrfs_block_group *block_group)
 304 {
 305         if (remove_from_discard_list(discard_ctl, block_group)) {
 306                 cancel_delayed_work_sync(&discard_ctl->work);
 307                 btrfs_discard_schedule_work(discard_ctl, true);
 308         }
 309 }
 310
 311 /**
 312  * btrfs_discard_queue_work - handles queuing the block_groups
 313  * @discard_ctl: discard control
 314  * @block_group: block_group of interest
 315  *
 316  * This maintains the LRU order of the discard lists.
 317  */
 318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
 319                               struct btrfs_block_group *block_group)
 320 {
 321         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 322                 return;
 323
 324         if (block_group->used == 0)
 325                 add_to_discard_unused_list(discard_ctl, block_group);
 326         else
 327                 add_to_discard_list(discard_ctl, block_group);
 328
 329         if (!delayed_work_pending(&discard_ctl->work))
 330                 btrfs_discard_schedule_work(discard_ctl, false);
 331 }
 332
 333 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 334                                           u64 now, bool override)
 335 {
 336         struct btrfs_block_group *block_group;
 337
 338         if (!btrfs_run_discard_work(discard_ctl))
 339                 return;
 340         if (!override && delayed_work_pending(&discard_ctl->work))
 341                 return;
 342
 343         block_group = find_next_block_group(discard_ctl, now);
 344         if (block_group) {
 345                 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
 346                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
 347
 348                 /*
 349                  * A single delayed workqueue item is responsible for
 350                  * discarding, so we can manage the bytes rate limit by keeping
 351                  * track of the previous discard.
 352                  */
 353                 if (kbps_limit && discard_ctl->prev_discard) {
 354                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
 355                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
 356                                                   NSEC_PER_SEC, bps_limit);
 357
 358                         delay = max(delay, bps_delay);
 359                 }
 360
 361                 /*
 362                  * This timeout is to hopefully prevent immediate discarding
 363                  * in a recently allocated block group.
 364                  */
 365                 if (now < block_group->discard_eligible_time) {
 366                         u64 bg_timeout = block_group->discard_eligible_time - now;
 367
 368                         delay = max(delay, bg_timeout);
 369                 }
 370
 371                 if (override && discard_ctl->prev_discard) {
 372                         u64 elapsed = now - discard_ctl->prev_discard_time;
 373
 374                         if (delay > elapsed)
 375                                 delay -= elapsed;
 376                         else
 377                                 delay = 0;
 378                 }
 379
 380                 mod_delayed_work(discard_ctl->discard_workers,
 381                                  &discard_ctl->work, nsecs_to_jiffies(delay));
 382         }
 383 }
 384
 385 /*
 386  * btrfs_discard_schedule_work - responsible for scheduling the discard work
 387  * @discard_ctl:  discard control
 388  * @override:     override the current timer
 389  *
 390  * Discards are issued by a delayed workqueue item.  @override is used to
 391  * update the current delay as the baseline delay interval is reevaluated on
 392  * transaction commit.  This is also maxed with any other rate limit.
 393  */
 394 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 395                                  bool override)
 396 {
 397         const u64 now = ktime_get_ns();
 398
 399         spin_lock(&discard_ctl->lock);
 400         __btrfs_discard_schedule_work(discard_ctl, now, override);
 401         spin_unlock(&discard_ctl->lock);
 402 }
 403
 404 /**
 405  * btrfs_finish_discard_pass - determine next step of a block_group
 406  * @discard_ctl: discard control
 407  * @block_group: block_group of interest
 408  *
 409  * This determines the next step for a block group after it's finished going
 410  * through a pass on a discard list.  If it is unused and fully trimmed, we can
 411  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto
 412  * the appropriate filter list or let it fall off.
 413  */
 414 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
 415                                       struct btrfs_block_group *block_group)
 416 {
 417         remove_from_discard_list(discard_ctl, block_group);
 418
 419         if (block_group->used == 0) {
 420                 if (btrfs_is_free_space_trimmed(block_group))
 421                         btrfs_mark_bg_unused(block_group);
 422                 else
 423                         add_to_discard_unused_list(discard_ctl, block_group);
 424         } else {
 425                 btrfs_update_discard_index(discard_ctl, block_group);
 426         }
 427 }
 428
 429 /**
 430  * btrfs_discard_workfn - discard work function
 431  * @work: work
 432  *
 433  * This finds the next block_group to start discarding and then discards a
 434  * single region.  It does this in a two-pass fashion: first extents and second
 435  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
 436  */
 437 static void btrfs_discard_workfn(struct work_struct *work)
 438 {
 439         struct btrfs_discard_ctl *discard_ctl;
 440         struct btrfs_block_group *block_group;
 441         enum btrfs_discard_state discard_state;
 442         int discard_index = 0;
 443         u64 trimmed = 0;
 444         u64 minlen = 0;
 445         u64 now = ktime_get_ns();
 446
 447         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
 448
 449         block_group = peek_discard_list(discard_ctl, &discard_state,
 450                                         &discard_index, now);
 451         if (!block_group || !btrfs_run_discard_work(discard_ctl))
 452                 return;
 453         if (now < block_group->discard_eligible_time) {
 454                 btrfs_discard_schedule_work(discard_ctl, false);
 455                 return;
 456         }
 457
 458         /* Perform discarding */
 459         minlen = discard_minlen[discard_index];
 460
 461         if (discard_state == BTRFS_DISCARD_BITMAPS) {
 462                 u64 maxlen = 0;
 463
 464                 /*
 465                  * Use the previous levels minimum discard length as the max
 466                  * length filter.  In the case something is added to make a
 467                  * region go beyond the max filter, the entire bitmap is set
 468                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
 469                  */
 470                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
 471                         maxlen = discard_minlen[discard_index - 1];
 472
 473                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
 474                                        block_group->discard_cursor,
 475                                        btrfs_block_group_end(block_group),
 476                                        minlen, maxlen, true);
 477                 discard_ctl->discard_bitmap_bytes += trimmed;
 478         } else {
 479                 btrfs_trim_block_group_extents(block_group, &trimmed,
 480                                        block_group->discard_cursor,
 481                                        btrfs_block_group_end(block_group),
 482                                        minlen, true);
 483                 discard_ctl->discard_extent_bytes += trimmed;
 484         }
 485
 486         /* Determine next steps for a block_group */
 487         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
 488                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
 489                         btrfs_finish_discard_pass(discard_ctl, block_group);
 490                 } else {
 491                         block_group->discard_cursor = block_group->start;
 492                         spin_lock(&discard_ctl->lock);
 493                         if (block_group->discard_state !=
 494                             BTRFS_DISCARD_RESET_CURSOR)
 495                                 block_group->discard_state =
 496                                                         BTRFS_DISCARD_BITMAPS;
 497                         spin_unlock(&discard_ctl->lock);
 498                 }
 499         }
 500
 501         now = ktime_get_ns();
 502         spin_lock(&discard_ctl->lock);
 503         discard_ctl->prev_discard = trimmed;
 504         discard_ctl->prev_discard_time = now;
 505         discard_ctl->block_group = NULL;
 506         __btrfs_discard_schedule_work(discard_ctl, now, false);
 507         spin_unlock(&discard_ctl->lock);
 508 }
 509
 510 /**
 511  * btrfs_run_discard_work - determines if async discard should be running
 512  * @discard_ctl: discard control
 513  *
 514  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
 515  */
 516 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
 517 {
 518         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
 519                                                      struct btrfs_fs_info,
 520                                                      discard_ctl);
 521
 522         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
 523                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
 524 }
 525
 526 /**
 527  * btrfs_discard_calc_delay - recalculate the base delay
 528  * @discard_ctl: discard control
 529  *
 530  * Recalculate the base delay which is based off the total number of
 531  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
 532  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
 533  */
 534 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
 535 {
 536         s32 discardable_extents;
 537         s64 discardable_bytes;
 538         u32 iops_limit;
 539         unsigned long delay;
 540
 541         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
 542         if (!discardable_extents)
 543                 return;
 544
 545         spin_lock(&discard_ctl->lock);
 546
 547         /*
 548          * The following is to fix a potential -1 discrepenancy that we're not
 549          * sure how to reproduce. But given that this is the only place that
 550          * utilizes these numbers and this is only called by from
 551          * btrfs_finish_extent_commit() which is synchronized, we can correct
 552          * here.
 553          */
 554         if (discardable_extents < 0)
 555                 atomic_add(-discardable_extents,
 556                            &discard_ctl->discardable_extents);
 557
 558         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
 559         if (discardable_bytes < 0)
 560                 atomic64_add(-discardable_bytes,
 561                              &discard_ctl->discardable_bytes);
 562
 563         if (discardable_extents <= 0) {
 564                 spin_unlock(&discard_ctl->lock);
 565                 return;
 566         }
 567
 568         iops_limit = READ_ONCE(discard_ctl->iops_limit);
 569         if (iops_limit)
 570                 delay = MSEC_PER_SEC / iops_limit;
 571         else
 572                 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
 573
 574         delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
 575                       BTRFS_DISCARD_MAX_DELAY_MSEC);
 576         discard_ctl->delay_ms = delay;
 577
 578         spin_unlock(&discard_ctl->lock);
 579 }
 580
 581 /**
 582  * btrfs_discard_update_discardable - propagate discard counters
 583  * @block_group: block_group of interest
 584  *
 585  * This propagates deltas of counters up to the discard_ctl.  It maintains a
 586  * current counter and a previous counter passing the delta up to the global
 587  * stat.  Then the current counter value becomes the previous counter value.
 588  */
 589 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
 590 {
 591         struct btrfs_free_space_ctl *ctl;
 592         struct btrfs_discard_ctl *discard_ctl;
 593         s32 extents_delta;
 594         s64 bytes_delta;
 595
 596         if (!block_group ||
 597             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
 598             !btrfs_is_block_group_data_only(block_group))
 599                 return;
 600
 601         ctl = block_group->free_space_ctl;
 602         discard_ctl = &block_group->fs_info->discard_ctl;
 603
 604         lockdep_assert_held(&ctl->tree_lock);
 605         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
 606                         ctl->discardable_extents[BTRFS_STAT_PREV];
 607         if (extents_delta) {
 608                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
 609                 ctl->discardable_extents[BTRFS_STAT_PREV] =
 610                         ctl->discardable_extents[BTRFS_STAT_CURR];
 611         }
 612
 613         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
 614                       ctl->discardable_bytes[BTRFS_STAT_PREV];
 615         if (bytes_delta) {
 616                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
 617                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
 618                         ctl->discardable_bytes[BTRFS_STAT_CURR];
 619         }
 620 }
 621
 622 /**
 623  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
 624  * @fs_info: fs_info of interest
 625  *
 626  * The unused_bgs list needs to be punted to the discard lists because the
 627  * order of operations is changed.  In the normal synchronous discard path, the
 628  * block groups are trimmed via a single large trim in transaction commit.  This
 629  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
 630  * it must be done before going down the unused_bgs path.
 631  */
 632 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
 633 {
 634         struct btrfs_block_group *block_group, *next;
 635
 636         spin_lock(&fs_info->unused_bgs_lock);
 637         /* We enabled async discard, so punt all to the queue */
 638         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
 639                                  bg_list) {
 640                 list_del_init(&block_group->bg_list);
 641                 btrfs_put_block_group(block_group);
 642                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
 643         }
 644         spin_unlock(&fs_info->unused_bgs_lock);
 645 }
 646
 647 /**
 648  * btrfs_discard_purge_list - purge discard lists
 649  * @discard_ctl: discard control
 650  *
 651  * If we are disabling async discard, we may have intercepted block groups that
 652  * are completely free and ready for the unused_bgs path.  As discarding will
 653  * now happen in transaction commit or not at all, we can safely mark the
 654  * corresponding block groups as unused and they will be sent on their merry
 655  * way to the unused_bgs list.
 656  */
 657 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
 658 {
 659         struct btrfs_block_group *block_group, *next;
 660         int i;
 661
 662         spin_lock(&discard_ctl->lock);
 663         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 664                 list_for_each_entry_safe(block_group, next,
 665                                          &discard_ctl->discard_list[i],
 666                                          discard_list) {
 667                         list_del_init(&block_group->discard_list);
 668                         spin_unlock(&discard_ctl->lock);
 669                         if (block_group->used == 0)
 670                                 btrfs_mark_bg_unused(block_group);
 671                         spin_lock(&discard_ctl->lock);
 672                 }
 673         }
 674         spin_unlock(&discard_ctl->lock);
 675 }
 676
 677 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
 678 {
 679         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
 680                 btrfs_discard_cleanup(fs_info);
 681                 return;
 682         }
 683
 684         btrfs_discard_punt_unused_bgs_list(fs_info);
 685
 686         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 687 }
 688
 689 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
 690 {
 691         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 692 }
 693
 694 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
 695 {
 696         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
 697         int i;
 698
 699         spin_lock_init(&discard_ctl->lock);
 700         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
 701
 702         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
 703                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
 704
 705         discard_ctl->prev_discard = 0;
 706         discard_ctl->prev_discard_time = 0;
 707         atomic_set(&discard_ctl->discardable_extents, 0);
 708         atomic64_set(&discard_ctl->discardable_bytes, 0);
 709         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
 710         discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
 711         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
 712         discard_ctl->kbps_limit = 0;
 713         discard_ctl->discard_extent_bytes = 0;
 714         discard_ctl->discard_bitmap_bytes = 0;
 715         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
 716 }
 717
 718 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
 719 {
 720         btrfs_discard_stop(fs_info);
 721         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
 722         btrfs_discard_purge_list(&fs_info->discard_ctl);
 723 }