fs/btrfs/locking.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2008 Oracle.  All rights reserved.
   4  */
   5
   6 #include <linux/sched.h>
   7 #include <linux/pagemap.h>
   8 #include <linux/spinlock.h>
   9 #include <linux/page-flags.h>
  10 #include <asm/bug.h>
  11 #include "misc.h"
  12 #include "ctree.h"
  13 #include "extent_io.h"
  14 #include "locking.h"
  15
  16 /*
  17  * Extent buffer locking
  18  * =====================
  19  *
  20  * We use a rw_semaphore for tree locking, and the semantics are exactly the
  21  * same:
  22  *
  23  * - reader/writer exclusion
  24  * - writer/writer exclusion
  25  * - reader/reader sharing
  26  * - try-lock semantics for readers and writers
  27  *
  28  * The rwsem implementation does opportunistic spinning which reduces number of
  29  * times the locking task needs to sleep.
  30  */
  31
  32 /*
  33  * __btrfs_tree_read_lock - lock extent buffer for read
  34  * @eb:         the eb to be locked
  35  * @nest:       the nesting level to be used for lockdep
  36  *
  37  * This takes the read lock on the extent buffer, using the specified nesting
  38  * level for lockdep purposes.
  39  */
  40 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
  41 {
  42         u64 start_ns = 0;
  43
  44         if (trace_btrfs_tree_read_lock_enabled())
  45                 start_ns = ktime_get_ns();
  46
  47         down_read_nested(&eb->lock, nest);
  48         trace_btrfs_tree_read_lock(eb, start_ns);
  49 }
  50
  51 void btrfs_tree_read_lock(struct extent_buffer *eb)
  52 {
  53         __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
  54 }
  55
  56 /*
  57  * Try-lock for read.
  58  *
  59  * Return 1 if the rwlock has been taken, 0 otherwise
  60  */
  61 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
  62 {
  63         if (down_read_trylock(&eb->lock)) {
  64                 trace_btrfs_try_tree_read_lock(eb);
  65                 return 1;
  66         }
  67         return 0;
  68 }
  69
  70 /*
  71  * Try-lock for write.
  72  *
  73  * Return 1 if the rwlock has been taken, 0 otherwise
  74  */
  75 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
  76 {
  77         if (down_write_trylock(&eb->lock)) {
  78                 eb->lock_owner = current->pid;
  79                 trace_btrfs_try_tree_write_lock(eb);
  80                 return 1;
  81         }
  82         return 0;
  83 }
  84
  85 /*
  86  * Release read lock.
  87  */
  88 void btrfs_tree_read_unlock(struct extent_buffer *eb)
  89 {
  90         trace_btrfs_tree_read_unlock(eb);
  91         up_read(&eb->lock);
  92 }
  93
  94 /*
  95  * __btrfs_tree_lock - lock eb for write
  96  * @eb:         the eb to lock
  97  * @nest:       the nesting to use for the lock
  98  *
  99  * Returns with the eb->lock write locked.
 100  */
 101 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
 102         __acquires(&eb->lock)
 103 {
 104         u64 start_ns = 0;
 105
 106         if (trace_btrfs_tree_lock_enabled())
 107                 start_ns = ktime_get_ns();
 108
 109         down_write_nested(&eb->lock, nest);
 110         eb->lock_owner = current->pid;
 111         trace_btrfs_tree_lock(eb, start_ns);
 112 }
 113
 114 void btrfs_tree_lock(struct extent_buffer *eb)
 115 {
 116         __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
 117 }
 118
 119 /*
 120  * Release the write lock.
 121  */
 122 void btrfs_tree_unlock(struct extent_buffer *eb)
 123 {
 124         trace_btrfs_tree_unlock(eb);
 125         eb->lock_owner = 0;
 126         up_write(&eb->lock);
 127 }
 128
 129 /*
 130  * This releases any locks held in the path starting at level and going all the
 131  * way up to the root.
 132  *
 133  * btrfs_search_slot will keep the lock held on higher nodes in a few corner
 134  * cases, such as COW of the block at slot zero in the node.  This ignores
 135  * those rules, and it should only be called when there are no more updates to
 136  * be done higher up in the tree.
 137  */
 138 void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 139 {
 140         int i;
 141
 142         if (path->keep_locks)
 143                 return;
 144
 145         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 146                 if (!path->nodes[i])
 147                         continue;
 148                 if (!path->locks[i])
 149                         continue;
 150                 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
 151                 path->locks[i] = 0;
 152         }
 153 }
 154
 155 /*
 156  * Loop around taking references on and locking the root node of the tree until
 157  * we end up with a lock on the root node.
 158  *
 159  * Return: root extent buffer with write lock held
 160  */
 161 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
 162 {
 163         struct extent_buffer *eb;
 164
 165         while (1) {
 166                 eb = btrfs_root_node(root);
 167                 btrfs_tree_lock(eb);
 168                 if (eb == root->node)
 169                         break;
 170                 btrfs_tree_unlock(eb);
 171                 free_extent_buffer(eb);
 172         }
 173         return eb;
 174 }
 175
 176 /*
 177  * Loop around taking references on and locking the root node of the tree until
 178  * we end up with a lock on the root node.
 179  *
 180  * Return: root extent buffer with read lock held
 181  */
 182 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
 183 {
 184         struct extent_buffer *eb;
 185
 186         while (1) {
 187                 eb = btrfs_root_node(root);
 188                 btrfs_tree_read_lock(eb);
 189                 if (eb == root->node)
 190                         break;
 191                 btrfs_tree_read_unlock(eb);
 192                 free_extent_buffer(eb);
 193         }
 194         return eb;
 195 }
 196
 197 /*
 198  * DREW locks
 199  * ==========
 200  *
 201  * DREW stands for double-reader-writer-exclusion lock. It's used in situation
 202  * where you want to provide A-B exclusion but not AA or BB.
 203  *
 204  * Currently implementation gives more priority to reader. If a reader and a
 205  * writer both race to acquire their respective sides of the lock the writer
 206  * would yield its lock as soon as it detects a concurrent reader. Additionally
 207  * if there are pending readers no new writers would be allowed to come in and
 208  * acquire the lock.
 209  */
 210
 211 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
 212 {
 213         int ret;
 214
 215         ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
 216         if (ret)
 217                 return ret;
 218
 219         atomic_set(&lock->readers, 0);
 220         init_waitqueue_head(&lock->pending_readers);
 221         init_waitqueue_head(&lock->pending_writers);
 222
 223         return 0;
 224 }
 225
 226 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
 227 {
 228         percpu_counter_destroy(&lock->writers);
 229 }
 230
 231 /* Return true if acquisition is successful, false otherwise */
 232 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
 233 {
 234         if (atomic_read(&lock->readers))
 235                 return false;
 236
 237         percpu_counter_inc(&lock->writers);
 238
 239         /* Ensure writers count is updated before we check for pending readers */
 240         smp_mb();
 241         if (atomic_read(&lock->readers)) {
 242                 btrfs_drew_write_unlock(lock);
 243                 return false;
 244         }
 245
 246         return true;
 247 }
 248
 249 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
 250 {
 251         while (true) {
 252                 if (btrfs_drew_try_write_lock(lock))
 253                         return;
 254                 wait_event(lock->pending_writers, !atomic_read(&lock->readers));
 255         }
 256 }
 257
 258 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
 259 {
 260         percpu_counter_dec(&lock->writers);
 261         cond_wake_up(&lock->pending_readers);
 262 }
 263
 264 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
 265 {
 266         atomic_inc(&lock->readers);
 267
 268         /*
 269          * Ensure the pending reader count is perceieved BEFORE this reader
 270          * goes to sleep in case of active writers. This guarantees new writers
 271          * won't be allowed and that the current reader will be woken up when
 272          * the last active writer finishes its jobs.
 273          */
 274         smp_mb__after_atomic();
 275
 276         wait_event(lock->pending_readers,
 277                    percpu_counter_sum(&lock->writers) == 0);
 278 }
 279
 280 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
 281 {
 282         /*
 283          * atomic_dec_and_test implies a full barrier, so woken up writers
 284          * are guaranteed to see the decrement
 285          */
 286         if (atomic_dec_and_test(&lock->readers))
 287                 wake_up(&lock->pending_writers);
 288 }