arch/powerpc/mm/book3s64/slb.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * PowerPC64 SLB support.
   4  *
   5  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
   6  * Based on earlier code written by:
   7  * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
   8  *    Copyright (c) 2001 Dave Engebretsen
   9  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
  10  */
  11
  12 #include <asm/interrupt.h>
  13 #include <asm/mmu.h>
  14 #include <asm/mmu_context.h>
  15 #include <asm/paca.h>
  16 #include <asm/lppaca.h>
  17 #include <asm/ppc-opcode.h>
  18 #include <asm/cputable.h>
  19 #include <asm/cacheflush.h>
  20 #include <asm/smp.h>
  21 #include <linux/compiler.h>
  22 #include <linux/context_tracking.h>
  23 #include <linux/mm_types.h>
  24 #include <linux/pgtable.h>
  25
  26 #include <asm/udbg.h>
  27 #include <asm/code-patching.h>
  28
  29 #include "internal.h"
  30
  31
  32 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
  33
  34 bool stress_slb_enabled __initdata;
  35
  36 static int __init parse_stress_slb(char *p)
  37 {
  38         stress_slb_enabled = true;
  39         return 0;
  40 }
  41 early_param("stress_slb", parse_stress_slb);
  42
  43 __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
  44
  45 static void assert_slb_presence(bool present, unsigned long ea)
  46 {
  47 #ifdef CONFIG_DEBUG_VM
  48         unsigned long tmp;
  49
  50         WARN_ON_ONCE(mfmsr() & MSR_EE);
  51
  52         if (!cpu_has_feature(CPU_FTR_ARCH_206))
  53                 return;
  54
  55         /*
  56          * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
  57          * ignores all other bits from 0-27, so just clear them all.
  58          */
  59         ea &= ~((1UL << SID_SHIFT) - 1);
  60         asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
  61
  62         WARN_ON(present == (tmp == 0));
  63 #endif
  64 }
  65
  66 static inline void slb_shadow_update(unsigned long ea, int ssize,
  67                                      unsigned long flags,
  68                                      enum slb_index index)
  69 {
  70         struct slb_shadow *p = get_slb_shadow();
  71
  72         /*
  73          * Clear the ESID first so the entry is not valid while we are
  74          * updating it.  No write barriers are needed here, provided
  75          * we only update the current CPU's SLB shadow buffer.
  76          */
  77         WRITE_ONCE(p->save_area[index].esid, 0);
  78         WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
  79         WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
  80 }
  81
  82 static inline void slb_shadow_clear(enum slb_index index)
  83 {
  84         WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
  85 }
  86
  87 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
  88                                         unsigned long flags,
  89                                         enum slb_index index)
  90 {
  91         /*
  92          * Updating the shadow buffer before writing the SLB ensures
  93          * we don't get a stale entry here if we get preempted by PHYP
  94          * between these two statements.
  95          */
  96         slb_shadow_update(ea, ssize, flags, index);
  97
  98         assert_slb_presence(false, ea);
  99         asm volatile("slbmte  %0,%1" :
 100                      : "r" (mk_vsid_data(ea, ssize, flags)),
 101                        "r" (mk_esid_data(ea, ssize, index))
 102                      : "memory" );
 103 }
 104
 105 /*
 106  * Insert bolted entries into SLB (which may not be empty, so don't clear
 107  * slb_cache_ptr).
 108  */
 109 void __slb_restore_bolted_realmode(void)
 110 {
 111         struct slb_shadow *p = get_slb_shadow();
 112         enum slb_index index;
 113
 114          /* No isync needed because realmode. */
 115         for (index = 0; index < SLB_NUM_BOLTED; index++) {
 116                 asm volatile("slbmte  %0,%1" :
 117                      : "r" (be64_to_cpu(p->save_area[index].vsid)),
 118                        "r" (be64_to_cpu(p->save_area[index].esid)));
 119         }
 120
 121         assert_slb_presence(true, local_paca->kstack);
 122 }
 123
 124 /*
 125  * Insert the bolted entries into an empty SLB.
 126  */
 127 void slb_restore_bolted_realmode(void)
 128 {
 129         __slb_restore_bolted_realmode();
 130         get_paca()->slb_cache_ptr = 0;
 131
 132         get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 133         get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 134 }
 135
 136 /*
 137  * This flushes all SLB entries including 0, so it must be realmode.
 138  */
 139 void slb_flush_all_realmode(void)
 140 {
 141         asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 142 }
 143
 144 static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
 145 {
 146         struct slb_shadow *p = get_slb_shadow();
 147         unsigned long ksp_esid_data, ksp_vsid_data;
 148         u32 ih;
 149
 150         /*
 151          * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
 152          * information created with Class=0 entries, which we use for kernel
 153          * SLB entries (the SLB entries themselves are still invalidated).
 154          *
 155          * Older processors will ignore this optimisation. Over-invalidation
 156          * is fine because we never rely on lookaside information existing.
 157          */
 158         if (preserve_kernel_lookaside)
 159                 ih = 1;
 160         else
 161                 ih = 0;
 162
 163         ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
 164         ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
 165
 166         asm volatile(PPC_SLBIA(%0)"     \n"
 167                      "slbmte    %1, %2  \n"
 168                      :: "i" (ih),
 169                         "r" (ksp_vsid_data),
 170                         "r" (ksp_esid_data)
 171                      : "memory");
 172 }
 173
 174 /*
 175  * This flushes non-bolted entries, it can be run in virtual mode. Must
 176  * be called with interrupts disabled.
 177  */
 178 void slb_flush_and_restore_bolted(void)
 179 {
 180         BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
 181
 182         WARN_ON(!irqs_disabled());
 183
 184         /*
 185          * We can't take a PMU exception in the following code, so hard
 186          * disable interrupts.
 187          */
 188         hard_irq_disable();
 189
 190         isync();
 191         __slb_flush_and_restore_bolted(false);
 192         isync();
 193
 194         assert_slb_presence(true, get_paca()->kstack);
 195
 196         get_paca()->slb_cache_ptr = 0;
 197
 198         get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 199         get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 200 }
 201
 202 void slb_save_contents(struct slb_entry *slb_ptr)
 203 {
 204         int i;
 205         unsigned long e, v;
 206
 207         /* Save slb_cache_ptr value. */
 208         get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
 209
 210         if (!slb_ptr)
 211                 return;
 212
 213         for (i = 0; i < mmu_slb_size; i++) {
 214                 asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
 215                 asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
 216                 slb_ptr->esid = e;
 217                 slb_ptr->vsid = v;
 218                 slb_ptr++;
 219         }
 220 }
 221
 222 void slb_dump_contents(struct slb_entry *slb_ptr)
 223 {
 224         int i, n;
 225         unsigned long e, v;
 226         unsigned long llp;
 227
 228         if (!slb_ptr)
 229                 return;
 230
 231         pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
 232
 233         for (i = 0; i < mmu_slb_size; i++) {
 234                 e = slb_ptr->esid;
 235                 v = slb_ptr->vsid;
 236                 slb_ptr++;
 237
 238                 if (!e && !v)
 239                         continue;
 240
 241                 pr_err("%02d %016lx %016lx %s\n", i, e, v,
 242                                 (e & SLB_ESID_V) ? "VALID" : "NOT VALID");
 243
 244                 if (!(e & SLB_ESID_V))
 245                         continue;
 246
 247                 llp = v & SLB_VSID_LLP;
 248                 if (v & SLB_VSID_B_1T) {
 249                         pr_err("     1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
 250                                GET_ESID_1T(e),
 251                                (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
 252                 } else {
 253                         pr_err("   256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
 254                                GET_ESID(e),
 255                                (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
 256                 }
 257         }
 258
 259         if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
 260                 /* RR is not so useful as it's often not used for allocation */
 261                 pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
 262
 263                 /* Dump slb cache entires as well. */
 264                 pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
 265                 pr_err("Valid SLB cache entries:\n");
 266                 n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
 267                 for (i = 0; i < n; i++)
 268                         pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
 269                 pr_err("Rest of SLB cache entries:\n");
 270                 for (i = n; i < SLB_CACHE_ENTRIES; i++)
 271                         pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
 272         }
 273 }
 274
 275 void slb_vmalloc_update(void)
 276 {
 277         /*
 278          * vmalloc is not bolted, so just have to flush non-bolted.
 279          */
 280         slb_flush_and_restore_bolted();
 281 }
 282
 283 static bool preload_hit(struct thread_info *ti, unsigned long esid)
 284 {
 285         unsigned char i;
 286
 287         for (i = 0; i < ti->slb_preload_nr; i++) {
 288                 unsigned char idx;
 289
 290                 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
 291                 if (esid == ti->slb_preload_esid[idx])
 292                         return true;
 293         }
 294         return false;
 295 }
 296
 297 static bool preload_add(struct thread_info *ti, unsigned long ea)
 298 {
 299         unsigned char idx;
 300         unsigned long esid;
 301
 302         if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 303                 /* EAs are stored >> 28 so 256MB segments don't need clearing */
 304                 if (ea & ESID_MASK_1T)
 305                         ea &= ESID_MASK_1T;
 306         }
 307
 308         esid = ea >> SID_SHIFT;
 309
 310         if (preload_hit(ti, esid))
 311                 return false;
 312
 313         idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
 314         ti->slb_preload_esid[idx] = esid;
 315         if (ti->slb_preload_nr == SLB_PRELOAD_NR)
 316                 ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
 317         else
 318                 ti->slb_preload_nr++;
 319
 320         return true;
 321 }
 322
 323 static void preload_age(struct thread_info *ti)
 324 {
 325         if (!ti->slb_preload_nr)
 326                 return;
 327         ti->slb_preload_nr--;
 328         ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
 329 }
 330
 331 void slb_setup_new_exec(void)
 332 {
 333         struct thread_info *ti = current_thread_info();
 334         struct mm_struct *mm = current->mm;
 335         unsigned long exec = 0x10000000;
 336
 337         WARN_ON(irqs_disabled());
 338
 339         /*
 340          * preload cache can only be used to determine whether a SLB
 341          * entry exists if it does not start to overflow.
 342          */
 343         if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
 344                 return;
 345
 346         hard_irq_disable();
 347
 348         /*
 349          * We have no good place to clear the slb preload cache on exec,
 350          * flush_thread is about the earliest arch hook but that happens
 351          * after we switch to the mm and have already preloaded the SLBEs.
 352          *
 353          * For the most part that's probably okay to use entries from the
 354          * previous exec, they will age out if unused. It may turn out to
 355          * be an advantage to clear the cache before switching to it,
 356          * however.
 357          */
 358
 359         /*
 360          * preload some userspace segments into the SLB.
 361          * Almost all 32 and 64bit PowerPC executables are linked at
 362          * 0x10000000 so it makes sense to preload this segment.
 363          */
 364         if (!is_kernel_addr(exec)) {
 365                 if (preload_add(ti, exec))
 366                         slb_allocate_user(mm, exec);
 367         }
 368
 369         /* Libraries and mmaps. */
 370         if (!is_kernel_addr(mm->mmap_base)) {
 371                 if (preload_add(ti, mm->mmap_base))
 372                         slb_allocate_user(mm, mm->mmap_base);
 373         }
 374
 375         /* see switch_slb */
 376         asm volatile("isync" : : : "memory");
 377
 378         local_irq_enable();
 379 }
 380
 381 void preload_new_slb_context(unsigned long start, unsigned long sp)
 382 {
 383         struct thread_info *ti = current_thread_info();
 384         struct mm_struct *mm = current->mm;
 385         unsigned long heap = mm->start_brk;
 386
 387         WARN_ON(irqs_disabled());
 388
 389         /* see above */
 390         if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
 391                 return;
 392
 393         hard_irq_disable();
 394
 395         /* Userspace entry address. */
 396         if (!is_kernel_addr(start)) {
 397                 if (preload_add(ti, start))
 398                         slb_allocate_user(mm, start);
 399         }
 400
 401         /* Top of stack, grows down. */
 402         if (!is_kernel_addr(sp)) {
 403                 if (preload_add(ti, sp))
 404                         slb_allocate_user(mm, sp);
 405         }
 406
 407         /* Bottom of heap, grows up. */
 408         if (heap && !is_kernel_addr(heap)) {
 409                 if (preload_add(ti, heap))
 410                         slb_allocate_user(mm, heap);
 411         }
 412
 413         /* see switch_slb */
 414         asm volatile("isync" : : : "memory");
 415
 416         local_irq_enable();
 417 }
 418
 419 static void slb_cache_slbie_kernel(unsigned int index)
 420 {
 421         unsigned long slbie_data = get_paca()->slb_cache[index];
 422         unsigned long ksp = get_paca()->kstack;
 423
 424         slbie_data <<= SID_SHIFT;
 425         slbie_data |= 0xc000000000000000ULL;
 426         if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
 427                 return;
 428         slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
 429
 430         asm volatile("slbie %0" : : "r" (slbie_data));
 431 }
 432
 433 static void slb_cache_slbie_user(unsigned int index)
 434 {
 435         unsigned long slbie_data = get_paca()->slb_cache[index];
 436
 437         slbie_data <<= SID_SHIFT;
 438         slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
 439         slbie_data |= SLBIE_C; /* user slbs have C=1 */
 440
 441         asm volatile("slbie %0" : : "r" (slbie_data));
 442 }
 443
 444 /* Flush all user entries from the segment table of the current processor. */
 445 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 446 {
 447         struct thread_info *ti = task_thread_info(tsk);
 448         unsigned char i;
 449
 450         /*
 451          * We need interrupts hard-disabled here, not just soft-disabled,
 452          * so that a PMU interrupt can't occur, which might try to access
 453          * user memory (to get a stack trace) and possible cause an SLB miss
 454          * which would update the slb_cache/slb_cache_ptr fields in the PACA.
 455          */
 456         hard_irq_disable();
 457         isync();
 458         if (stress_slb()) {
 459                 __slb_flush_and_restore_bolted(false);
 460                 isync();
 461                 get_paca()->slb_cache_ptr = 0;
 462                 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 463
 464         } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 465                 /*
 466                  * SLBIA IH=3 invalidates all Class=1 SLBEs and their
 467                  * associated lookaside structures, which matches what
 468                  * switch_slb wants. So ARCH_300 does not use the slb
 469                  * cache.
 470                  */
 471                 asm volatile(PPC_SLBIA(3));
 472
 473         } else {
 474                 unsigned long offset = get_paca()->slb_cache_ptr;
 475
 476                 if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
 477                     offset <= SLB_CACHE_ENTRIES) {
 478                         /*
 479                          * Could assert_slb_presence(true) here, but
 480                          * hypervisor or machine check could have come
 481                          * in and removed the entry at this point.
 482                          */
 483
 484                         for (i = 0; i < offset; i++)
 485                                 slb_cache_slbie_user(i);
 486
 487                         /* Workaround POWER5 < DD2.1 issue */
 488                         if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
 489                                 slb_cache_slbie_user(0);
 490
 491                 } else {
 492                         /* Flush but retain kernel lookaside information */
 493                         __slb_flush_and_restore_bolted(true);
 494                         isync();
 495
 496                         get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 497                 }
 498
 499                 get_paca()->slb_cache_ptr = 0;
 500         }
 501         get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 502
 503         copy_mm_to_paca(mm);
 504
 505         /*
 506          * We gradually age out SLBs after a number of context switches to
 507          * reduce reload overhead of unused entries (like we do with FP/VEC
 508          * reload). Each time we wrap 256 switches, take an entry out of the
 509          * SLB preload cache.
 510          */
 511         tsk->thread.load_slb++;
 512         if (!tsk->thread.load_slb) {
 513                 unsigned long pc = KSTK_EIP(tsk);
 514
 515                 preload_age(ti);
 516                 preload_add(ti, pc);
 517         }
 518
 519         for (i = 0; i < ti->slb_preload_nr; i++) {
 520                 unsigned char idx;
 521                 unsigned long ea;
 522
 523                 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
 524                 ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
 525
 526                 slb_allocate_user(mm, ea);
 527         }
 528
 529         /*
 530          * Synchronize slbmte preloads with possible subsequent user memory
 531          * address accesses by the kernel (user mode won't happen until
 532          * rfid, which is safe).
 533          */
 534         isync();
 535 }
 536
 537 void slb_set_size(u16 size)
 538 {
 539         mmu_slb_size = size;
 540 }
 541
 542 void slb_initialize(void)
 543 {
 544         unsigned long linear_llp, vmalloc_llp, io_llp;
 545         unsigned long lflags;
 546         static int slb_encoding_inited;
 547 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 548         unsigned long vmemmap_llp;
 549 #endif
 550
 551         /* Prepare our SLB miss handler based on our page size */
 552         linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
 553         io_llp = mmu_psize_defs[mmu_io_psize].sllp;
 554         vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
 555         get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
 556 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 557         vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
 558 #endif
 559         if (!slb_encoding_inited) {
 560                 slb_encoding_inited = 1;
 561                 pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
 562                 pr_devel("SLB: io      LLP = %04lx\n", io_llp);
 563 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 564                 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
 565 #endif
 566         }
 567
 568         get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
 569         get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
 570         get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 571
 572         lflags = SLB_VSID_KERNEL | linear_llp;
 573
 574         /* Invalidate the entire SLB (even entry 0) & all the ERATS */
 575         asm volatile("isync":::"memory");
 576         asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
 577         asm volatile("isync; slbia; isync":::"memory");
 578         create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
 579
 580         /*
 581          * For the boot cpu, we're running on the stack in init_thread_union,
 582          * which is in the first segment of the linear mapping, and also
 583          * get_paca()->kstack hasn't been initialized yet.
 584          * For secondary cpus, we need to bolt the kernel stack entry now.
 585          */
 586         slb_shadow_clear(KSTACK_INDEX);
 587         if (raw_smp_processor_id() != boot_cpuid &&
 588             (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
 589                 create_shadowed_slbe(get_paca()->kstack,
 590                                      mmu_kernel_ssize, lflags, KSTACK_INDEX);
 591
 592         asm volatile("isync":::"memory");
 593 }
 594
 595 static void slb_cache_update(unsigned long esid_data)
 596 {
 597         int slb_cache_index;
 598
 599         if (cpu_has_feature(CPU_FTR_ARCH_300))
 600                 return; /* ISAv3.0B and later does not use slb_cache */
 601
 602         if (stress_slb())
 603                 return;
 604
 605         /*
 606          * Now update slb cache entries
 607          */
 608         slb_cache_index = local_paca->slb_cache_ptr;
 609         if (slb_cache_index < SLB_CACHE_ENTRIES) {
 610                 /*
 611                  * We have space in slb cache for optimized switch_slb().
 612                  * Top 36 bits from esid_data as per ISA
 613                  */
 614                 local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
 615                 local_paca->slb_cache_ptr++;
 616         } else {
 617                 /*
 618                  * Our cache is full and the current cache content strictly
 619                  * doesn't indicate the active SLB contents. Bump the ptr
 620                  * so that switch_slb() will ignore the cache.
 621                  */
 622                 local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
 623         }
 624 }
 625
 626 static enum slb_index alloc_slb_index(bool kernel)
 627 {
 628         enum slb_index index;
 629
 630         /*
 631          * The allocation bitmaps can become out of synch with the SLB
 632          * when the _switch code does slbie when bolting a new stack
 633          * segment and it must not be anywhere else in the SLB. This leaves
 634          * a kernel allocated entry that is unused in the SLB. With very
 635          * large systems or small segment sizes, the bitmaps could slowly
 636          * fill with these entries. They will eventually be cleared out
 637          * by the round robin allocator in that case, so it's probably not
 638          * worth accounting for.
 639          */
 640
 641         /*
 642          * SLBs beyond 32 entries are allocated with stab_rr only
 643          * POWER7/8/9 have 32 SLB entries, this could be expanded if a
 644          * future CPU has more.
 645          */
 646         if (local_paca->slb_used_bitmap != U32_MAX) {
 647                 index = ffz(local_paca->slb_used_bitmap);
 648                 local_paca->slb_used_bitmap |= 1U << index;
 649                 if (kernel)
 650                         local_paca->slb_kern_bitmap |= 1U << index;
 651         } else {
 652                 /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
 653                 index = local_paca->stab_rr;
 654                 if (index < (mmu_slb_size - 1))
 655                         index++;
 656                 else
 657                         index = SLB_NUM_BOLTED;
 658                 local_paca->stab_rr = index;
 659                 if (index < 32) {
 660                         if (kernel)
 661                                 local_paca->slb_kern_bitmap |= 1U << index;
 662                         else
 663                                 local_paca->slb_kern_bitmap &= ~(1U << index);
 664                 }
 665         }
 666         BUG_ON(index < SLB_NUM_BOLTED);
 667
 668         return index;
 669 }
 670
 671 static long slb_insert_entry(unsigned long ea, unsigned long context,
 672                                 unsigned long flags, int ssize, bool kernel)
 673 {
 674         unsigned long vsid;
 675         unsigned long vsid_data, esid_data;
 676         enum slb_index index;
 677
 678         vsid = get_vsid(context, ea, ssize);
 679         if (!vsid)
 680                 return -EFAULT;
 681
 682         /*
 683          * There must not be a kernel SLB fault in alloc_slb_index or before
 684          * slbmte here or the allocation bitmaps could get out of whack with
 685          * the SLB.
 686          *
 687          * User SLB faults or preloads take this path which might get inlined
 688          * into the caller, so add compiler barriers here to ensure unsafe
 689          * memory accesses do not come between.
 690          */
 691         barrier();
 692
 693         index = alloc_slb_index(kernel);
 694
 695         vsid_data = __mk_vsid_data(vsid, ssize, flags);
 696         esid_data = mk_esid_data(ea, ssize, index);
 697
 698         /*
 699          * No need for an isync before or after this slbmte. The exception
 700          * we enter with and the rfid we exit with are context synchronizing.
 701          * User preloads should add isync afterwards in case the kernel
 702          * accesses user memory before it returns to userspace with rfid.
 703          */
 704         assert_slb_presence(false, ea);
 705         if (stress_slb()) {
 706                 int slb_cache_index = local_paca->slb_cache_ptr;
 707
 708                 /*
 709                  * stress_slb() does not use slb cache, repurpose as a
 710                  * cache of inserted (non-bolted) kernel SLB entries. All
 711                  * non-bolted kernel entries are flushed on any user fault,
 712                  * or if there are already 3 non-boled kernel entries.
 713                  */
 714                 BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
 715                 if (!kernel || slb_cache_index == 3) {
 716                         int i;
 717
 718                         for (i = 0; i < slb_cache_index; i++)
 719                                 slb_cache_slbie_kernel(i);
 720                         slb_cache_index = 0;
 721                 }
 722
 723                 if (kernel)
 724                         local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
 725                 local_paca->slb_cache_ptr = slb_cache_index;
 726         }
 727         asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
 728
 729         barrier();
 730
 731         if (!kernel)
 732                 slb_cache_update(esid_data);
 733
 734         return 0;
 735 }
 736
 737 static long slb_allocate_kernel(unsigned long ea, unsigned long id)
 738 {
 739         unsigned long context;
 740         unsigned long flags;
 741         int ssize;
 742
 743         if (id == LINEAR_MAP_REGION_ID) {
 744
 745                 /* We only support upto H_MAX_PHYSMEM_BITS */
 746                 if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
 747                         return -EFAULT;
 748
 749                 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
 750
 751 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 752         } else if (id == VMEMMAP_REGION_ID) {
 753
 754                 if (ea >= H_VMEMMAP_END)
 755                         return -EFAULT;
 756
 757                 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
 758 #endif
 759         } else if (id == VMALLOC_REGION_ID) {
 760
 761                 if (ea >= H_VMALLOC_END)
 762                         return -EFAULT;
 763
 764                 flags = local_paca->vmalloc_sllp;
 765
 766         } else if (id == IO_REGION_ID) {
 767
 768                 if (ea >= H_KERN_IO_END)
 769                         return -EFAULT;
 770
 771                 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
 772
 773         } else {
 774                 return -EFAULT;
 775         }
 776
 777         ssize = MMU_SEGSIZE_1T;
 778         if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
 779                 ssize = MMU_SEGSIZE_256M;
 780
 781         context = get_kernel_context(ea);
 782
 783         return slb_insert_entry(ea, context, flags, ssize, true);
 784 }
 785
 786 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
 787 {
 788         unsigned long context;
 789         unsigned long flags;
 790         int bpsize;
 791         int ssize;
 792
 793         /*
 794          * consider this as bad access if we take a SLB miss
 795          * on an address above addr limit.
 796          */
 797         if (ea >= mm_ctx_slb_addr_limit(&mm->context))
 798                 return -EFAULT;
 799
 800         context = get_user_context(&mm->context, ea);
 801         if (!context)
 802                 return -EFAULT;
 803
 804         if (unlikely(ea >= H_PGTABLE_RANGE)) {
 805                 WARN_ON(1);
 806                 return -EFAULT;
 807         }
 808
 809         ssize = user_segment_size(ea);
 810
 811         bpsize = get_slice_psize(mm, ea);
 812         flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
 813
 814         return slb_insert_entry(ea, context, flags, ssize, false);
 815 }
 816
 817 DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
 818 {
 819         unsigned long ea = regs->dar;
 820         unsigned long id = get_region_id(ea);
 821
 822         /* IRQs are not reconciled here, so can't check irqs_disabled */
 823         VM_WARN_ON(mfmsr() & MSR_EE);
 824
 825         if (regs_is_unrecoverable(regs))
 826                 return -EINVAL;
 827
 828         /*
 829          * SLB kernel faults must be very careful not to touch anything that is
 830          * not bolted. E.g., PACA and global variables are okay, mm->context
 831          * stuff is not. SLB user faults may access all of memory (and induce
 832          * one recursive SLB kernel fault), so the kernel fault must not
 833          * trample on the user fault state at those points.
 834          */
 835
 836         /*
 837          * This is a raw interrupt handler, for performance, so that
 838          * fast_interrupt_return can be used. The handler must not touch local
 839          * irq state, or schedule. We could test for usermode and upgrade to a
 840          * normal process context (synchronous) interrupt for those, which
 841          * would make them first-class kernel code and able to be traced and
 842          * instrumented, although performance would suffer a bit, it would
 843          * probably be a good tradeoff.
 844          */
 845         if (id >= LINEAR_MAP_REGION_ID) {
 846                 long err;
 847 #ifdef CONFIG_DEBUG_VM
 848                 /* Catch recursive kernel SLB faults. */
 849                 BUG_ON(local_paca->in_kernel_slb_handler);
 850                 local_paca->in_kernel_slb_handler = 1;
 851 #endif
 852                 err = slb_allocate_kernel(ea, id);
 853 #ifdef CONFIG_DEBUG_VM
 854                 local_paca->in_kernel_slb_handler = 0;
 855 #endif
 856                 return err;
 857         } else {
 858                 struct mm_struct *mm = current->mm;
 859                 long err;
 860
 861                 if (unlikely(!mm))
 862                         return -EFAULT;
 863
 864                 err = slb_allocate_user(mm, ea);
 865                 if (!err)
 866                         preload_add(current_thread_info(), ea);
 867
 868                 return err;
 869         }
 870 }