arch/powerpc/lib/qspinlock.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 #include <linux/bug.h>
   3 #include <linux/compiler.h>
   4 #include <linux/export.h>
   5 #include <linux/percpu.h>
   6 #include <linux/processor.h>
   7 #include <linux/smp.h>
   8 #include <linux/topology.h>
   9 #include <linux/sched/clock.h>
  10 #include <asm/qspinlock.h>
  11 #include <asm/paravirt.h>
  12
  13 #define MAX_NODES       4
  14
  15 struct qnode {
  16         struct qnode    *next;
  17         struct qspinlock *lock;
  18         int             cpu;
  19         u8              sleepy; /* 1 if the previous vCPU was preempted or
  20                                  * if the previous node was sleepy */
  21         u8              locked; /* 1 if lock acquired */
  22 };
  23
  24 struct qnodes {
  25         int             count;
  26         struct qnode nodes[MAX_NODES];
  27 };
  28
  29 /* Tuning parameters */
  30 static int steal_spins __read_mostly = (1 << 5);
  31 static int remote_steal_spins __read_mostly = (1 << 2);
  32 #if _Q_SPIN_TRY_LOCK_STEAL == 1
  33 static const bool maybe_stealers = true;
  34 #else
  35 static bool maybe_stealers __read_mostly = true;
  36 #endif
  37 static int head_spins __read_mostly = (1 << 8);
  38
  39 static bool pv_yield_owner __read_mostly = true;
  40 static bool pv_yield_allow_steal __read_mostly = false;
  41 static bool pv_spin_on_preempted_owner __read_mostly = false;
  42 static bool pv_sleepy_lock __read_mostly = true;
  43 static bool pv_sleepy_lock_sticky __read_mostly = false;
  44 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
  45 static int pv_sleepy_lock_factor __read_mostly = 256;
  46 static bool pv_yield_prev __read_mostly = true;
  47 static bool pv_yield_sleepy_owner __read_mostly = true;
  48 static bool pv_prod_head __read_mostly = false;
  49
  50 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
  51 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
  52
  53 #if _Q_SPIN_SPEC_BARRIER == 1
  54 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
  55 #else
  56 #define spec_barrier() do { } while (0)
  57 #endif
  58
  59 static __always_inline bool recently_sleepy(void)
  60 {
  61         /* pv_sleepy_lock is true when this is called */
  62         if (pv_sleepy_lock_interval_ns) {
  63                 u64 seen = this_cpu_read(sleepy_lock_seen_clock);
  64
  65                 if (seen) {
  66                         u64 delta = sched_clock() - seen;
  67                         if (delta < pv_sleepy_lock_interval_ns)
  68                                 return true;
  69                         this_cpu_write(sleepy_lock_seen_clock, 0);
  70                 }
  71         }
  72
  73         return false;
  74 }
  75
  76 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
  77 {
  78         if (paravirt && sleepy)
  79                 return steal_spins * pv_sleepy_lock_factor;
  80         else
  81                 return steal_spins;
  82 }
  83
  84 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
  85 {
  86         if (paravirt && sleepy)
  87                 return remote_steal_spins * pv_sleepy_lock_factor;
  88         else
  89                 return remote_steal_spins;
  90 }
  91
  92 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
  93 {
  94         if (paravirt && sleepy)
  95                 return head_spins * pv_sleepy_lock_factor;
  96         else
  97                 return head_spins;
  98 }
  99
 100 static inline u32 encode_tail_cpu(int cpu)
 101 {
 102         return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 103 }
 104
 105 static inline int decode_tail_cpu(u32 val)
 106 {
 107         return (val >> _Q_TAIL_CPU_OFFSET) - 1;
 108 }
 109
 110 static inline int get_owner_cpu(u32 val)
 111 {
 112         return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
 113 }
 114
 115 /*
 116  * Try to acquire the lock if it was not already locked. If the tail matches
 117  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
 118  *
 119  * This is used by the head of the queue to acquire the lock and clean up
 120  * its tail if it was the last one queued.
 121  */
 122 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
 123 {
 124         u32 newval = queued_spin_encode_locked_val();
 125         u32 prev, tmp;
 126
 127         asm volatile(
 128 "1:     lwarx   %0,0,%2,%7      # trylock_clean_tail                    \n"
 129         /* This test is necessary if there could be stealers */
 130 "       andi.   %1,%0,%5                                                \n"
 131 "       bne     3f                                                      \n"
 132         /* Test whether the lock tail == mytail */
 133 "       and     %1,%0,%6                                                \n"
 134 "       cmpw    0,%1,%3                                                 \n"
 135         /* Merge the new locked value */
 136 "       or      %1,%1,%4                                                \n"
 137 "       bne     2f                                                      \n"
 138         /* If the lock tail matched, then clear it, otherwise leave it. */
 139 "       andc    %1,%1,%6                                                \n"
 140 "2:     stwcx.  %1,0,%2                                                 \n"
 141 "       bne-    1b                                                      \n"
 142 "\t"    PPC_ACQUIRE_BARRIER "                                           \n"
 143 "3:                                                                     \n"
 144         : "=&r" (prev), "=&r" (tmp)
 145         : "r" (&lock->val), "r"(tail), "r" (newval),
 146           "i" (_Q_LOCKED_VAL),
 147           "r" (_Q_TAIL_CPU_MASK),
 148           "i" (_Q_SPIN_EH_HINT)
 149         : "cr0", "memory");
 150
 151         return prev;
 152 }
 153
 154 /*
 155  * Publish our tail, replacing previous tail. Return previous value.
 156  *
 157  * This provides a release barrier for publishing node, this pairs with the
 158  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
 159  * value.
 160  */
 161 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
 162 {
 163         u32 prev, tmp;
 164
 165         kcsan_release();
 166
 167         asm volatile(
 168 "\t"    PPC_RELEASE_BARRIER "                                           \n"
 169 "1:     lwarx   %0,0,%2         # publish_tail_cpu                      \n"
 170 "       andc    %1,%0,%4                                                \n"
 171 "       or      %1,%1,%3                                                \n"
 172 "       stwcx.  %1,0,%2                                                 \n"
 173 "       bne-    1b                                                      \n"
 174         : "=&r" (prev), "=&r"(tmp)
 175         : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
 176         : "cr0", "memory");
 177
 178         return prev;
 179 }
 180
 181 static __always_inline u32 set_mustq(struct qspinlock *lock)
 182 {
 183         u32 prev;
 184
 185         asm volatile(
 186 "1:     lwarx   %0,0,%1         # set_mustq                             \n"
 187 "       or      %0,%0,%2                                                \n"
 188 "       stwcx.  %0,0,%1                                                 \n"
 189 "       bne-    1b                                                      \n"
 190         : "=&r" (prev)
 191         : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
 192         : "cr0", "memory");
 193
 194         return prev;
 195 }
 196
 197 static __always_inline u32 clear_mustq(struct qspinlock *lock)
 198 {
 199         u32 prev;
 200
 201         asm volatile(
 202 "1:     lwarx   %0,0,%1         # clear_mustq                           \n"
 203 "       andc    %0,%0,%2                                                \n"
 204 "       stwcx.  %0,0,%1                                                 \n"
 205 "       bne-    1b                                                      \n"
 206         : "=&r" (prev)
 207         : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
 208         : "cr0", "memory");
 209
 210         return prev;
 211 }
 212
 213 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
 214 {
 215         u32 prev;
 216         u32 new = old | _Q_SLEEPY_VAL;
 217
 218         BUG_ON(!(old & _Q_LOCKED_VAL));
 219         BUG_ON(old & _Q_SLEEPY_VAL);
 220
 221         asm volatile(
 222 "1:     lwarx   %0,0,%1         # try_set_sleepy                        \n"
 223 "       cmpw    0,%0,%2                                                 \n"
 224 "       bne-    2f                                                      \n"
 225 "       stwcx.  %3,0,%1                                                 \n"
 226 "       bne-    1b                                                      \n"
 227 "2:                                                                     \n"
 228         : "=&r" (prev)
 229         : "r" (&lock->val), "r"(old), "r" (new)
 230         : "cr0", "memory");
 231
 232         return likely(prev == old);
 233 }
 234
 235 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
 236 {
 237         if (pv_sleepy_lock) {
 238                 if (pv_sleepy_lock_interval_ns)
 239                         this_cpu_write(sleepy_lock_seen_clock, sched_clock());
 240                 if (!(val & _Q_SLEEPY_VAL))
 241                         try_set_sleepy(lock, val);
 242         }
 243 }
 244
 245 static __always_inline void seen_sleepy_lock(void)
 246 {
 247         if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
 248                 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
 249 }
 250
 251 static __always_inline void seen_sleepy_node(void)
 252 {
 253         if (pv_sleepy_lock) {
 254                 if (pv_sleepy_lock_interval_ns)
 255                         this_cpu_write(sleepy_lock_seen_clock, sched_clock());
 256                 /* Don't set sleepy because we likely have a stale val */
 257         }
 258 }
 259
 260 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
 261 {
 262         struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
 263         int idx;
 264
 265         /*
 266          * After publishing the new tail and finding a previous tail in the
 267          * previous val (which is the control dependency), this barrier
 268          * orders the release barrier in publish_tail_cpu performed by the
 269          * last CPU, with subsequently looking at its qnode structures
 270          * after the barrier.
 271          */
 272         smp_acquire__after_ctrl_dep();
 273
 274         for (idx = 0; idx < MAX_NODES; idx++) {
 275                 struct qnode *qnode = &qnodesp->nodes[idx];
 276                 if (qnode->lock == lock)
 277                         return qnode;
 278         }
 279
 280         BUG();
 281 }
 282
 283 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
 284 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
 285 {
 286         int owner;
 287         u32 yield_count;
 288         bool preempted = false;
 289
 290         BUG_ON(!(val & _Q_LOCKED_VAL));
 291
 292         if (!paravirt)
 293                 goto relax;
 294
 295         if (!pv_yield_owner)
 296                 goto relax;
 297
 298         owner = get_owner_cpu(val);
 299         yield_count = yield_count_of(owner);
 300
 301         if ((yield_count & 1) == 0)
 302                 goto relax; /* owner vcpu is running */
 303
 304         spin_end();
 305
 306         seen_sleepy_owner(lock, val);
 307         preempted = true;
 308
 309         /*
 310          * Read the lock word after sampling the yield count. On the other side
 311          * there may a wmb because the yield count update is done by the
 312          * hypervisor preemption and the value update by the OS, however this
 313          * ordering might reduce the chance of out of order accesses and
 314          * improve the heuristic.
 315          */
 316         smp_rmb();
 317
 318         if (READ_ONCE(lock->val) == val) {
 319                 if (mustq)
 320                         clear_mustq(lock);
 321                 yield_to_preempted(owner, yield_count);
 322                 if (mustq)
 323                         set_mustq(lock);
 324                 spin_begin();
 325
 326                 /* Don't relax if we yielded. Maybe we should? */
 327                 return preempted;
 328         }
 329         spin_begin();
 330 relax:
 331         spin_cpu_relax();
 332
 333         return preempted;
 334 }
 335
 336 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
 337 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 338 {
 339         return __yield_to_locked_owner(lock, val, paravirt, false);
 340 }
 341
 342 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
 343 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 344 {
 345         bool mustq = false;
 346
 347         if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
 348                 mustq = true;
 349
 350         return __yield_to_locked_owner(lock, val, paravirt, mustq);
 351 }
 352
 353 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
 354 {
 355         struct qnode *next;
 356         int owner;
 357
 358         if (!paravirt)
 359                 return;
 360         if (!pv_yield_sleepy_owner)
 361                 return;
 362
 363         next = READ_ONCE(node->next);
 364         if (!next)
 365                 return;
 366
 367         if (next->sleepy)
 368                 return;
 369
 370         owner = get_owner_cpu(val);
 371         if (vcpu_is_preempted(owner))
 372                 next->sleepy = 1;
 373 }
 374
 375 /* Called inside spin_begin() */
 376 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
 377 {
 378         u32 yield_count;
 379         bool preempted = false;
 380
 381         if (!paravirt)
 382                 goto relax;
 383
 384         if (!pv_yield_sleepy_owner)
 385                 goto yield_prev;
 386
 387         /*
 388          * If the previous waiter was preempted it might not be able to
 389          * propagate sleepy to us, so check the lock in that case too.
 390          */
 391         if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
 392                 u32 val = READ_ONCE(lock->val);
 393
 394                 if (val & _Q_LOCKED_VAL) {
 395                         if (node->next && !node->next->sleepy) {
 396                                 /*
 397                                  * Propagate sleepy to next waiter. Only if
 398                                  * owner is preempted, which allows the queue
 399                                  * to become "non-sleepy" if vCPU preemption
 400                                  * ceases to occur, even if the lock remains
 401                                  * highly contended.
 402                                  */
 403                                 if (vcpu_is_preempted(get_owner_cpu(val)))
 404                                         node->next->sleepy = 1;
 405                         }
 406
 407                         preempted = yield_to_locked_owner(lock, val, paravirt);
 408                         if (preempted)
 409                                 return preempted;
 410                 }
 411                 node->sleepy = false;
 412         }
 413
 414 yield_prev:
 415         if (!pv_yield_prev)
 416                 goto relax;
 417
 418         yield_count = yield_count_of(prev_cpu);
 419         if ((yield_count & 1) == 0)
 420                 goto relax; /* owner vcpu is running */
 421
 422         spin_end();
 423
 424         preempted = true;
 425         seen_sleepy_node();
 426
 427         smp_rmb(); /* See __yield_to_locked_owner comment */
 428
 429         if (!READ_ONCE(node->locked)) {
 430                 yield_to_preempted(prev_cpu, yield_count);
 431                 spin_begin();
 432                 return preempted;
 433         }
 434         spin_begin();
 435
 436 relax:
 437         spin_cpu_relax();
 438
 439         return preempted;
 440 }
 441
 442 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
 443 {
 444         if (iters >= get_steal_spins(paravirt, sleepy))
 445                 return true;
 446
 447         if (IS_ENABLED(CONFIG_NUMA) &&
 448             (iters >= get_remote_steal_spins(paravirt, sleepy))) {
 449                 int cpu = get_owner_cpu(val);
 450                 if (numa_node_id() != cpu_to_node(cpu))
 451                         return true;
 452         }
 453         return false;
 454 }
 455
 456 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 457 {
 458         bool seen_preempted = false;
 459         bool sleepy = false;
 460         int iters = 0;
 461         u32 val;
 462
 463         if (!steal_spins) {
 464                 /* XXX: should spin_on_preempted_owner do anything here? */
 465                 return false;
 466         }
 467
 468         /* Attempt to steal the lock */
 469         spin_begin();
 470         do {
 471                 bool preempted = false;
 472
 473                 val = READ_ONCE(lock->val);
 474                 if (val & _Q_MUST_Q_VAL)
 475                         break;
 476                 spec_barrier();
 477
 478                 if (unlikely(!(val & _Q_LOCKED_VAL))) {
 479                         spin_end();
 480                         if (__queued_spin_trylock_steal(lock))
 481                                 return true;
 482                         spin_begin();
 483                 } else {
 484                         preempted = yield_to_locked_owner(lock, val, paravirt);
 485                 }
 486
 487                 if (paravirt && pv_sleepy_lock) {
 488                         if (!sleepy) {
 489                                 if (val & _Q_SLEEPY_VAL) {
 490                                         seen_sleepy_lock();
 491                                         sleepy = true;
 492                                 } else if (recently_sleepy()) {
 493                                         sleepy = true;
 494                                 }
 495                         }
 496                         if (pv_sleepy_lock_sticky && seen_preempted &&
 497                             !(val & _Q_SLEEPY_VAL)) {
 498                                 if (try_set_sleepy(lock, val))
 499                                         val |= _Q_SLEEPY_VAL;
 500                         }
 501                 }
 502
 503                 if (preempted) {
 504                         seen_preempted = true;
 505                         sleepy = true;
 506                         if (!pv_spin_on_preempted_owner)
 507                                 iters++;
 508                         /*
 509                          * pv_spin_on_preempted_owner don't increase iters
 510                          * while the owner is preempted -- we won't interfere
 511                          * with it by definition. This could introduce some
 512                          * latency issue if we continually observe preempted
 513                          * owners, but hopefully that's a rare corner case of
 514                          * a badly oversubscribed system.
 515                          */
 516                 } else {
 517                         iters++;
 518                 }
 519         } while (!steal_break(val, iters, paravirt, sleepy));
 520
 521         spin_end();
 522
 523         return false;
 524 }
 525
 526 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
 527 {
 528         struct qnodes *qnodesp;
 529         struct qnode *next, *node;
 530         u32 val, old, tail;
 531         bool seen_preempted = false;
 532         bool sleepy = false;
 533         bool mustq = false;
 534         int idx;
 535         int iters = 0;
 536
 537         BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 538
 539         qnodesp = this_cpu_ptr(&qnodes);
 540         if (unlikely(qnodesp->count >= MAX_NODES)) {
 541                 spec_barrier();
 542                 while (!queued_spin_trylock(lock))
 543                         cpu_relax();
 544                 return;
 545         }
 546
 547         idx = qnodesp->count++;
 548         /*
 549          * Ensure that we increment the head node->count before initialising
 550          * the actual node. If the compiler is kind enough to reorder these
 551          * stores, then an IRQ could overwrite our assignments.
 552          */
 553         barrier();
 554         node = &qnodesp->nodes[idx];
 555         node->next = NULL;
 556         node->lock = lock;
 557         node->cpu = smp_processor_id();
 558         node->sleepy = 0;
 559         node->locked = 0;
 560
 561         tail = encode_tail_cpu(node->cpu);
 562
 563         /*
 564          * Assign all attributes of a node before it can be published.
 565          * Issues an lwsync, serving as a release barrier, as well as a
 566          * compiler barrier.
 567          */
 568         old = publish_tail_cpu(lock, tail);
 569
 570         /*
 571          * If there was a previous node; link it and wait until reaching the
 572          * head of the waitqueue.
 573          */
 574         if (old & _Q_TAIL_CPU_MASK) {
 575                 int prev_cpu = decode_tail_cpu(old);
 576                 struct qnode *prev = get_tail_qnode(lock, prev_cpu);
 577
 578                 /* Link @node into the waitqueue. */
 579                 WRITE_ONCE(prev->next, node);
 580
 581                 /* Wait for mcs node lock to be released */
 582                 spin_begin();
 583                 while (!READ_ONCE(node->locked)) {
 584                         spec_barrier();
 585
 586                         if (yield_to_prev(lock, node, prev_cpu, paravirt))
 587                                 seen_preempted = true;
 588                 }
 589                 spec_barrier();
 590                 spin_end();
 591
 592                 smp_rmb(); /* acquire barrier for the mcs lock */
 593
 594                 /*
 595                  * Generic qspinlocks have this prefetch here, but it seems
 596                  * like it could cause additional line transitions because
 597                  * the waiter will keep loading from it.
 598                  */
 599                 if (_Q_SPIN_PREFETCH_NEXT) {
 600                         next = READ_ONCE(node->next);
 601                         if (next)
 602                                 prefetchw(next);
 603                 }
 604         }
 605
 606         /* We're at the head of the waitqueue, wait for the lock. */
 607 again:
 608         spin_begin();
 609         for (;;) {
 610                 bool preempted;
 611
 612                 val = READ_ONCE(lock->val);
 613                 if (!(val & _Q_LOCKED_VAL))
 614                         break;
 615                 spec_barrier();
 616
 617                 if (paravirt && pv_sleepy_lock && maybe_stealers) {
 618                         if (!sleepy) {
 619                                 if (val & _Q_SLEEPY_VAL) {
 620                                         seen_sleepy_lock();
 621                                         sleepy = true;
 622                                 } else if (recently_sleepy()) {
 623                                         sleepy = true;
 624                                 }
 625                         }
 626                         if (pv_sleepy_lock_sticky && seen_preempted &&
 627                             !(val & _Q_SLEEPY_VAL)) {
 628                                 if (try_set_sleepy(lock, val))
 629                                         val |= _Q_SLEEPY_VAL;
 630                         }
 631                 }
 632
 633                 propagate_sleepy(node, val, paravirt);
 634                 preempted = yield_head_to_locked_owner(lock, val, paravirt);
 635                 if (!maybe_stealers)
 636                         continue;
 637
 638                 if (preempted)
 639                         seen_preempted = true;
 640
 641                 if (paravirt && preempted) {
 642                         sleepy = true;
 643
 644                         if (!pv_spin_on_preempted_owner)
 645                                 iters++;
 646                 } else {
 647                         iters++;
 648                 }
 649
 650                 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
 651                         mustq = true;
 652                         set_mustq(lock);
 653                         val |= _Q_MUST_Q_VAL;
 654                 }
 655         }
 656         spec_barrier();
 657         spin_end();
 658
 659         /* If we're the last queued, must clean up the tail. */
 660         old = trylock_clean_tail(lock, tail);
 661         if (unlikely(old & _Q_LOCKED_VAL)) {
 662                 BUG_ON(!maybe_stealers);
 663                 goto again; /* Can only be true if maybe_stealers. */
 664         }
 665
 666         if ((old & _Q_TAIL_CPU_MASK) == tail)
 667                 goto release; /* We were the tail, no next. */
 668
 669         /* There is a next, must wait for node->next != NULL (MCS protocol) */
 670         next = READ_ONCE(node->next);
 671         if (!next) {
 672                 spin_begin();
 673                 while (!(next = READ_ONCE(node->next)))
 674                         cpu_relax();
 675                 spin_end();
 676         }
 677         spec_barrier();
 678
 679         /*
 680          * Unlock the next mcs waiter node. Release barrier is not required
 681          * here because the acquirer is only accessing the lock word, and
 682          * the acquire barrier we took the lock with orders that update vs
 683          * this store to locked. The corresponding barrier is the smp_rmb()
 684          * acquire barrier for mcs lock, above.
 685          */
 686         if (paravirt && pv_prod_head) {
 687                 int next_cpu = next->cpu;
 688                 WRITE_ONCE(next->locked, 1);
 689                 if (_Q_SPIN_MISO)
 690                         asm volatile("miso" ::: "memory");
 691                 if (vcpu_is_preempted(next_cpu))
 692                         prod_cpu(next_cpu);
 693         } else {
 694                 WRITE_ONCE(next->locked, 1);
 695                 if (_Q_SPIN_MISO)
 696                         asm volatile("miso" ::: "memory");
 697         }
 698
 699 release:
 700         qnodesp->count--; /* release the node */
 701 }
 702
 703 void queued_spin_lock_slowpath(struct qspinlock *lock)
 704 {
 705         /*
 706          * This looks funny, but it induces the compiler to inline both
 707          * sides of the branch rather than share code as when the condition
 708          * is passed as the paravirt argument to the functions.
 709          */
 710         if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
 711                 if (try_to_steal_lock(lock, true)) {
 712                         spec_barrier();
 713                         return;
 714                 }
 715                 queued_spin_lock_mcs_queue(lock, true);
 716         } else {
 717                 if (try_to_steal_lock(lock, false)) {
 718                         spec_barrier();
 719                         return;
 720                 }
 721                 queued_spin_lock_mcs_queue(lock, false);
 722         }
 723 }
 724 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 725
 726 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 727 void pv_spinlocks_init(void)
 728 {
 729 }
 730 #endif
 731
 732 #include <linux/debugfs.h>
 733 static int steal_spins_set(void *data, u64 val)
 734 {
 735 #if _Q_SPIN_TRY_LOCK_STEAL == 1
 736         /* MAYBE_STEAL remains true */
 737         steal_spins = val;
 738 #else
 739         static DEFINE_MUTEX(lock);
 740
 741         /*
 742          * The lock slow path has a !maybe_stealers case that can assume
 743          * the head of queue will not see concurrent waiters. That waiter
 744          * is unsafe in the presence of stealers, so must keep them away
 745          * from one another.
 746          */
 747
 748         mutex_lock(&lock);
 749         if (val && !steal_spins) {
 750                 maybe_stealers = true;
 751                 /* wait for queue head waiter to go away */
 752                 synchronize_rcu();
 753                 steal_spins = val;
 754         } else if (!val && steal_spins) {
 755                 steal_spins = val;
 756                 /* wait for all possible stealers to go away */
 757                 synchronize_rcu();
 758                 maybe_stealers = false;
 759         } else {
 760                 steal_spins = val;
 761         }
 762         mutex_unlock(&lock);
 763 #endif
 764
 765         return 0;
 766 }
 767
 768 static int steal_spins_get(void *data, u64 *val)
 769 {
 770         *val = steal_spins;
 771
 772         return 0;
 773 }
 774
 775 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
 776
 777 static int remote_steal_spins_set(void *data, u64 val)
 778 {
 779         remote_steal_spins = val;
 780
 781         return 0;
 782 }
 783
 784 static int remote_steal_spins_get(void *data, u64 *val)
 785 {
 786         *val = remote_steal_spins;
 787
 788         return 0;
 789 }
 790
 791 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
 792
 793 static int head_spins_set(void *data, u64 val)
 794 {
 795         head_spins = val;
 796
 797         return 0;
 798 }
 799
 800 static int head_spins_get(void *data, u64 *val)
 801 {
 802         *val = head_spins;
 803
 804         return 0;
 805 }
 806
 807 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
 808
 809 static int pv_yield_owner_set(void *data, u64 val)
 810 {
 811         pv_yield_owner = !!val;
 812
 813         return 0;
 814 }
 815
 816 static int pv_yield_owner_get(void *data, u64 *val)
 817 {
 818         *val = pv_yield_owner;
 819
 820         return 0;
 821 }
 822
 823 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
 824
 825 static int pv_yield_allow_steal_set(void *data, u64 val)
 826 {
 827         pv_yield_allow_steal = !!val;
 828
 829         return 0;
 830 }
 831
 832 static int pv_yield_allow_steal_get(void *data, u64 *val)
 833 {
 834         *val = pv_yield_allow_steal;
 835
 836         return 0;
 837 }
 838
 839 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
 840
 841 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
 842 {
 843         pv_spin_on_preempted_owner = !!val;
 844
 845         return 0;
 846 }
 847
 848 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
 849 {
 850         *val = pv_spin_on_preempted_owner;
 851
 852         return 0;
 853 }
 854
 855 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
 856
 857 static int pv_sleepy_lock_set(void *data, u64 val)
 858 {
 859         pv_sleepy_lock = !!val;
 860
 861         return 0;
 862 }
 863
 864 static int pv_sleepy_lock_get(void *data, u64 *val)
 865 {
 866         *val = pv_sleepy_lock;
 867
 868         return 0;
 869 }
 870
 871 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
 872
 873 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
 874 {
 875         pv_sleepy_lock_sticky = !!val;
 876
 877         return 0;
 878 }
 879
 880 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
 881 {
 882         *val = pv_sleepy_lock_sticky;
 883
 884         return 0;
 885 }
 886
 887 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
 888
 889 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
 890 {
 891         pv_sleepy_lock_interval_ns = val;
 892
 893         return 0;
 894 }
 895
 896 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
 897 {
 898         *val = pv_sleepy_lock_interval_ns;
 899
 900         return 0;
 901 }
 902
 903 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
 904
 905 static int pv_sleepy_lock_factor_set(void *data, u64 val)
 906 {
 907         pv_sleepy_lock_factor = val;
 908
 909         return 0;
 910 }
 911
 912 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
 913 {
 914         *val = pv_sleepy_lock_factor;
 915
 916         return 0;
 917 }
 918
 919 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
 920
 921 static int pv_yield_prev_set(void *data, u64 val)
 922 {
 923         pv_yield_prev = !!val;
 924
 925         return 0;
 926 }
 927
 928 static int pv_yield_prev_get(void *data, u64 *val)
 929 {
 930         *val = pv_yield_prev;
 931
 932         return 0;
 933 }
 934
 935 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
 936
 937 static int pv_yield_sleepy_owner_set(void *data, u64 val)
 938 {
 939         pv_yield_sleepy_owner = !!val;
 940
 941         return 0;
 942 }
 943
 944 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
 945 {
 946         *val = pv_yield_sleepy_owner;
 947
 948         return 0;
 949 }
 950
 951 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
 952
 953 static int pv_prod_head_set(void *data, u64 val)
 954 {
 955         pv_prod_head = !!val;
 956
 957         return 0;
 958 }
 959
 960 static int pv_prod_head_get(void *data, u64 *val)
 961 {
 962         *val = pv_prod_head;
 963
 964         return 0;
 965 }
 966
 967 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
 968
 969 static __init int spinlock_debugfs_init(void)
 970 {
 971         debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
 972         debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
 973         debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 974         if (is_shared_processor()) {
 975                 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
 976                 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
 977                 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
 978                 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
 979                 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
 980                 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
 981                 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
 982                 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 983                 debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
 984                 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
 985         }
 986
 987         return 0;
 988 }
 989 device_initcall(spinlock_debugfs_init);