fs/btrfs/locking.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2008 Oracle.  All rights reserved.
   4  */
   5
   6 #include <linux/sched.h>
   7 #include <linux/pagemap.h>
   8 #include <linux/spinlock.h>
   9 #include <linux/page-flags.h>
  10 #include <asm/bug.h>
  11 #include <trace/events/btrfs.h>
  12 #include "misc.h"
  13 #include "ctree.h"
  14 #include "extent_io.h"
  15 #include "locking.h"
  16 #include "accessors.h"
  17
  18 /*
  19  * Lockdep class keys for extent_buffer->lock's in this root.  For a given
  20  * eb, the lockdep key is determined by the btrfs_root it belongs to and
  21  * the level the eb occupies in the tree.
  22  *
  23  * Different roots are used for different purposes and may nest inside each
  24  * other and they require separate keysets.  As lockdep keys should be
  25  * static, assign keysets according to the purpose of the root as indicated
  26  * by btrfs_root->root_key.objectid.  This ensures that all special purpose
  27  * roots have separate keysets.
  28  *
  29  * Lock-nesting across peer nodes is always done with the immediate parent
  30  * node locked thus preventing deadlock.  As lockdep doesn't know this, use
  31  * subclass to avoid triggering lockdep warning in such cases.
  32  *
  33  * The key is set by the readpage_end_io_hook after the buffer has passed
  34  * csum validation but before the pages are unlocked.  It is also set by
  35  * btrfs_init_new_buffer on freshly allocated blocks.
  36  *
  37  * We also add a check to make sure the highest level of the tree is the
  38  * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
  39  * needs update as well.
  40  */
  41 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  42 #if BTRFS_MAX_LEVEL != 8
  43 #error
  44 #endif
  45
  46 #define DEFINE_LEVEL(stem, level)                                       \
  47         .names[level] = "btrfs-" stem "-0" #level,
  48
  49 #define DEFINE_NAME(stem)                                               \
  50         DEFINE_LEVEL(stem, 0)                                           \
  51         DEFINE_LEVEL(stem, 1)                                           \
  52         DEFINE_LEVEL(stem, 2)                                           \
  53         DEFINE_LEVEL(stem, 3)                                           \
  54         DEFINE_LEVEL(stem, 4)                                           \
  55         DEFINE_LEVEL(stem, 5)                                           \
  56         DEFINE_LEVEL(stem, 6)                                           \
  57         DEFINE_LEVEL(stem, 7)
  58
  59 static struct btrfs_lockdep_keyset {
  60         u64                     id;             /* root objectid */
  61         /* Longest entry: btrfs-block-group-00 */
  62         char                    names[BTRFS_MAX_LEVEL][24];
  63         struct lock_class_key   keys[BTRFS_MAX_LEVEL];
  64 } btrfs_lockdep_keysets[] = {
  65         { .id = BTRFS_ROOT_TREE_OBJECTID,       DEFINE_NAME("root")     },
  66         { .id = BTRFS_EXTENT_TREE_OBJECTID,     DEFINE_NAME("extent")   },
  67         { .id = BTRFS_CHUNK_TREE_OBJECTID,      DEFINE_NAME("chunk")    },
  68         { .id = BTRFS_DEV_TREE_OBJECTID,        DEFINE_NAME("dev")      },
  69         { .id = BTRFS_CSUM_TREE_OBJECTID,       DEFINE_NAME("csum")     },
  70         { .id = BTRFS_QUOTA_TREE_OBJECTID,      DEFINE_NAME("quota")    },
  71         { .id = BTRFS_TREE_LOG_OBJECTID,        DEFINE_NAME("log")      },
  72         { .id = BTRFS_TREE_RELOC_OBJECTID,      DEFINE_NAME("treloc")   },
  73         { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc")   },
  74         { .id = BTRFS_UUID_TREE_OBJECTID,       DEFINE_NAME("uuid")     },
  75         { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
  76         { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
  77         { .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") },
  78         { .id = 0,                              DEFINE_NAME("tree")     },
  79 };
  80
  81 #undef DEFINE_LEVEL
  82 #undef DEFINE_NAME
  83
  84 void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
  85 {
  86         struct btrfs_lockdep_keyset *ks;
  87
  88         BUG_ON(level >= ARRAY_SIZE(ks->keys));
  89
  90         /* Find the matching keyset, id 0 is the default entry */
  91         for (ks = btrfs_lockdep_keysets; ks->id; ks++)
  92                 if (ks->id == objectid)
  93                         break;
  94
  95         lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
  96 }
  97
  98 void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
  99 {
 100         if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
 101                 btrfs_set_buffer_lockdep_class(root->root_key.objectid,
 102                                                eb, btrfs_header_level(eb));
 103 }
 104
 105 #endif
 106
 107 #ifdef CONFIG_BTRFS_DEBUG
 108 static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner)
 109 {
 110         eb->lock_owner = owner;
 111 }
 112 #else
 113 static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { }
 114 #endif
 115
 116 /*
 117  * Extent buffer locking
 118  * =====================
 119  *
 120  * We use a rw_semaphore for tree locking, and the semantics are exactly the
 121  * same:
 122  *
 123  * - reader/writer exclusion
 124  * - writer/writer exclusion
 125  * - reader/reader sharing
 126  * - try-lock semantics for readers and writers
 127  *
 128  * The rwsem implementation does opportunistic spinning which reduces number of
 129  * times the locking task needs to sleep.
 130  */
 131
 132 /*
 133  * __btrfs_tree_read_lock - lock extent buffer for read
 134  * @eb:         the eb to be locked
 135  * @nest:       the nesting level to be used for lockdep
 136  *
 137  * This takes the read lock on the extent buffer, using the specified nesting
 138  * level for lockdep purposes.
 139  */
 140 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
 141 {
 142         u64 start_ns = 0;
 143
 144         if (trace_btrfs_tree_read_lock_enabled())
 145                 start_ns = ktime_get_ns();
 146
 147         down_read_nested(&eb->lock, nest);
 148         trace_btrfs_tree_read_lock(eb, start_ns);
 149 }
 150
 151 void btrfs_tree_read_lock(struct extent_buffer *eb)
 152 {
 153         __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
 154 }
 155
 156 /*
 157  * Try-lock for read.
 158  *
 159  * Return 1 if the rwlock has been taken, 0 otherwise
 160  */
 161 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 162 {
 163         if (down_read_trylock(&eb->lock)) {
 164                 trace_btrfs_try_tree_read_lock(eb);
 165                 return 1;
 166         }
 167         return 0;
 168 }
 169
 170 /*
 171  * Try-lock for write.
 172  *
 173  * Return 1 if the rwlock has been taken, 0 otherwise
 174  */
 175 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 176 {
 177         if (down_write_trylock(&eb->lock)) {
 178                 btrfs_set_eb_lock_owner(eb, current->pid);
 179                 trace_btrfs_try_tree_write_lock(eb);
 180                 return 1;
 181         }
 182         return 0;
 183 }
 184
 185 /*
 186  * Release read lock.
 187  */
 188 void btrfs_tree_read_unlock(struct extent_buffer *eb)
 189 {
 190         trace_btrfs_tree_read_unlock(eb);
 191         up_read(&eb->lock);
 192 }
 193
 194 /*
 195  * Lock eb for write.
 196  *
 197  * @eb:         the eb to lock
 198  * @nest:       the nesting to use for the lock
 199  *
 200  * Returns with the eb->lock write locked.
 201  */
 202 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
 203         __acquires(&eb->lock)
 204 {
 205         u64 start_ns = 0;
 206
 207         if (trace_btrfs_tree_lock_enabled())
 208                 start_ns = ktime_get_ns();
 209
 210         down_write_nested(&eb->lock, nest);
 211         btrfs_set_eb_lock_owner(eb, current->pid);
 212         trace_btrfs_tree_lock(eb, start_ns);
 213 }
 214
 215 void btrfs_tree_lock(struct extent_buffer *eb)
 216 {
 217         __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
 218 }
 219
 220 /*
 221  * Release the write lock.
 222  */
 223 void btrfs_tree_unlock(struct extent_buffer *eb)
 224 {
 225         trace_btrfs_tree_unlock(eb);
 226         btrfs_set_eb_lock_owner(eb, 0);
 227         up_write(&eb->lock);
 228 }
 229
 230 /*
 231  * This releases any locks held in the path starting at level and going all the
 232  * way up to the root.
 233  *
 234  * btrfs_search_slot will keep the lock held on higher nodes in a few corner
 235  * cases, such as COW of the block at slot zero in the node.  This ignores
 236  * those rules, and it should only be called when there are no more updates to
 237  * be done higher up in the tree.
 238  */
 239 void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 240 {
 241         int i;
 242
 243         if (path->keep_locks)
 244                 return;
 245
 246         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 247                 if (!path->nodes[i])
 248                         continue;
 249                 if (!path->locks[i])
 250                         continue;
 251                 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
 252                 path->locks[i] = 0;
 253         }
 254 }
 255
 256 /*
 257  * Loop around taking references on and locking the root node of the tree until
 258  * we end up with a lock on the root node.
 259  *
 260  * Return: root extent buffer with write lock held
 261  */
 262 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
 263 {
 264         struct extent_buffer *eb;
 265
 266         while (1) {
 267                 eb = btrfs_root_node(root);
 268
 269                 btrfs_maybe_reset_lockdep_class(root, eb);
 270                 btrfs_tree_lock(eb);
 271                 if (eb == root->node)
 272                         break;
 273                 btrfs_tree_unlock(eb);
 274                 free_extent_buffer(eb);
 275         }
 276         return eb;
 277 }
 278
 279 /*
 280  * Loop around taking references on and locking the root node of the tree until
 281  * we end up with a lock on the root node.
 282  *
 283  * Return: root extent buffer with read lock held
 284  */
 285 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
 286 {
 287         struct extent_buffer *eb;
 288
 289         while (1) {
 290                 eb = btrfs_root_node(root);
 291
 292                 btrfs_maybe_reset_lockdep_class(root, eb);
 293                 btrfs_tree_read_lock(eb);
 294                 if (eb == root->node)
 295                         break;
 296                 btrfs_tree_read_unlock(eb);
 297                 free_extent_buffer(eb);
 298         }
 299         return eb;
 300 }
 301
 302 /*
 303  * Loop around taking references on and locking the root node of the tree in
 304  * nowait mode until we end up with a lock on the root node or returning to
 305  * avoid blocking.
 306  *
 307  * Return: root extent buffer with read lock held or -EAGAIN.
 308  */
 309 struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
 310 {
 311         struct extent_buffer *eb;
 312
 313         while (1) {
 314                 eb = btrfs_root_node(root);
 315                 if (!btrfs_try_tree_read_lock(eb)) {
 316                         free_extent_buffer(eb);
 317                         return ERR_PTR(-EAGAIN);
 318                 }
 319                 if (eb == root->node)
 320                         break;
 321                 btrfs_tree_read_unlock(eb);
 322                 free_extent_buffer(eb);
 323         }
 324         return eb;
 325 }
 326
 327 /*
 328  * DREW locks
 329  * ==========
 330  *
 331  * DREW stands for double-reader-writer-exclusion lock. It's used in situation
 332  * where you want to provide A-B exclusion but not AA or BB.
 333  *
 334  * Currently implementation gives more priority to reader. If a reader and a
 335  * writer both race to acquire their respective sides of the lock the writer
 336  * would yield its lock as soon as it detects a concurrent reader. Additionally
 337  * if there are pending readers no new writers would be allowed to come in and
 338  * acquire the lock.
 339  */
 340
 341 void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
 342 {
 343         atomic_set(&lock->readers, 0);
 344         atomic_set(&lock->writers, 0);
 345         init_waitqueue_head(&lock->pending_readers);
 346         init_waitqueue_head(&lock->pending_writers);
 347 }
 348
 349 /* Return true if acquisition is successful, false otherwise */
 350 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
 351 {
 352         if (atomic_read(&lock->readers))
 353                 return false;
 354
 355         atomic_inc(&lock->writers);
 356
 357         /* Ensure writers count is updated before we check for pending readers */
 358         smp_mb__after_atomic();
 359         if (atomic_read(&lock->readers)) {
 360                 btrfs_drew_write_unlock(lock);
 361                 return false;
 362         }
 363
 364         return true;
 365 }
 366
 367 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
 368 {
 369         while (true) {
 370                 if (btrfs_drew_try_write_lock(lock))
 371                         return;
 372                 wait_event(lock->pending_writers, !atomic_read(&lock->readers));
 373         }
 374 }
 375
 376 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
 377 {
 378         atomic_dec(&lock->writers);
 379         cond_wake_up(&lock->pending_readers);
 380 }
 381
 382 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
 383 {
 384         atomic_inc(&lock->readers);
 385
 386         /*
 387          * Ensure the pending reader count is perceieved BEFORE this reader
 388          * goes to sleep in case of active writers. This guarantees new writers
 389          * won't be allowed and that the current reader will be woken up when
 390          * the last active writer finishes its jobs.
 391          */
 392         smp_mb__after_atomic();
 393
 394         wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0);
 395 }
 396
 397 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
 398 {
 399         /*
 400          * atomic_dec_and_test implies a full barrier, so woken up writers
 401          * are guaranteed to see the decrement
 402          */
 403         if (atomic_dec_and_test(&lock->readers))
 404                 wake_up(&lock->pending_writers);
 405 }