GNU Linux-libre 4.14.254-gnu1
[releases.git] / arch / sh / mm / fault.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2012  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/sched/signal.h>
17 #include <linux/hardirq.h>
18 #include <linux/kprobes.h>
19 #include <linux/perf_event.h>
20 #include <linux/kdebug.h>
21 #include <linux/uaccess.h>
22 #include <asm/io_trapped.h>
23 #include <asm/mmu_context.h>
24 #include <asm/tlbflush.h>
25 #include <asm/traps.h>
26
27 static inline int notify_page_fault(struct pt_regs *regs, int trap)
28 {
29         int ret = 0;
30
31         if (kprobes_built_in() && !user_mode(regs)) {
32                 preempt_disable();
33                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
34                         ret = 1;
35                 preempt_enable();
36         }
37
38         return ret;
39 }
40
41 static void
42 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
43                      struct task_struct *tsk)
44 {
45         siginfo_t info;
46
47         info.si_signo   = si_signo;
48         info.si_errno   = 0;
49         info.si_code    = si_code;
50         info.si_addr    = (void __user *)address;
51
52         force_sig_info(si_signo, &info, tsk);
53 }
54
55 /*
56  * This is useful to dump out the page tables associated with
57  * 'addr' in mm 'mm'.
58  */
59 static void show_pte(struct mm_struct *mm, unsigned long addr)
60 {
61         pgd_t *pgd;
62
63         if (mm) {
64                 pgd = mm->pgd;
65         } else {
66                 pgd = get_TTB();
67
68                 if (unlikely(!pgd))
69                         pgd = swapper_pg_dir;
70         }
71
72         printk(KERN_ALERT "pgd = %p\n", pgd);
73         pgd += pgd_index(addr);
74         printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
75                (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
76
77         do {
78                 pud_t *pud;
79                 pmd_t *pmd;
80                 pte_t *pte;
81
82                 if (pgd_none(*pgd))
83                         break;
84
85                 if (pgd_bad(*pgd)) {
86                         printk("(bad)");
87                         break;
88                 }
89
90                 pud = pud_offset(pgd, addr);
91                 if (PTRS_PER_PUD != 1)
92                         printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
93                                (u64)pud_val(*pud));
94
95                 if (pud_none(*pud))
96                         break;
97
98                 if (pud_bad(*pud)) {
99                         printk("(bad)");
100                         break;
101                 }
102
103                 pmd = pmd_offset(pud, addr);
104                 if (PTRS_PER_PMD != 1)
105                         printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
106                                (u64)pmd_val(*pmd));
107
108                 if (pmd_none(*pmd))
109                         break;
110
111                 if (pmd_bad(*pmd)) {
112                         printk("(bad)");
113                         break;
114                 }
115
116                 /* We must not map this if we have highmem enabled */
117                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
118                         break;
119
120                 pte = pte_offset_kernel(pmd, addr);
121                 printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
122                        (u64)pte_val(*pte));
123         } while (0);
124
125         printk("\n");
126 }
127
128 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
129 {
130         unsigned index = pgd_index(address);
131         pgd_t *pgd_k;
132         pud_t *pud, *pud_k;
133         pmd_t *pmd, *pmd_k;
134
135         pgd += index;
136         pgd_k = init_mm.pgd + index;
137
138         if (!pgd_present(*pgd_k))
139                 return NULL;
140
141         pud = pud_offset(pgd, address);
142         pud_k = pud_offset(pgd_k, address);
143         if (!pud_present(*pud_k))
144                 return NULL;
145
146         if (!pud_present(*pud))
147             set_pud(pud, *pud_k);
148
149         pmd = pmd_offset(pud, address);
150         pmd_k = pmd_offset(pud_k, address);
151         if (!pmd_present(*pmd_k))
152                 return NULL;
153
154         if (!pmd_present(*pmd))
155                 set_pmd(pmd, *pmd_k);
156         else {
157                 /*
158                  * The page tables are fully synchronised so there must
159                  * be another reason for the fault. Return NULL here to
160                  * signal that we have not taken care of the fault.
161                  */
162                 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
163                 return NULL;
164         }
165
166         return pmd_k;
167 }
168
169 #ifdef CONFIG_SH_STORE_QUEUES
170 #define __FAULT_ADDR_LIMIT      P3_ADDR_MAX
171 #else
172 #define __FAULT_ADDR_LIMIT      VMALLOC_END
173 #endif
174
175 /*
176  * Handle a fault on the vmalloc or module mapping area
177  */
178 static noinline int vmalloc_fault(unsigned long address)
179 {
180         pgd_t *pgd_k;
181         pmd_t *pmd_k;
182         pte_t *pte_k;
183
184         /* Make sure we are in vmalloc/module/P3 area: */
185         if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
186                 return -1;
187
188         /*
189          * Synchronize this task's top level page-table
190          * with the 'reference' page table.
191          *
192          * Do _not_ use "current" here. We might be inside
193          * an interrupt in the middle of a task switch..
194          */
195         pgd_k = get_TTB();
196         pmd_k = vmalloc_sync_one(pgd_k, address);
197         if (!pmd_k)
198                 return -1;
199
200         pte_k = pte_offset_kernel(pmd_k, address);
201         if (!pte_present(*pte_k))
202                 return -1;
203
204         return 0;
205 }
206
207 static void
208 show_fault_oops(struct pt_regs *regs, unsigned long address)
209 {
210         if (!oops_may_print())
211                 return;
212
213         printk(KERN_ALERT "BUG: unable to handle kernel ");
214         if (address < PAGE_SIZE)
215                 printk(KERN_CONT "NULL pointer dereference");
216         else
217                 printk(KERN_CONT "paging request");
218
219         printk(KERN_CONT " at %08lx\n", address);
220         printk(KERN_ALERT "PC:");
221         printk_address(regs->pc, 1);
222
223         show_pte(NULL, address);
224 }
225
226 static noinline void
227 no_context(struct pt_regs *regs, unsigned long error_code,
228            unsigned long address)
229 {
230         /* Are we prepared to handle this kernel fault?  */
231         if (fixup_exception(regs))
232                 return;
233
234         if (handle_trapped_io(regs, address))
235                 return;
236
237         /*
238          * Oops. The kernel tried to access some bad page. We'll have to
239          * terminate things with extreme prejudice.
240          */
241         bust_spinlocks(1);
242
243         show_fault_oops(regs, address);
244
245         die("Oops", regs, error_code);
246         bust_spinlocks(0);
247         do_exit(SIGKILL);
248 }
249
250 static void
251 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
252                        unsigned long address, int si_code)
253 {
254         struct task_struct *tsk = current;
255
256         /* User mode accesses just cause a SIGSEGV */
257         if (user_mode(regs)) {
258                 /*
259                  * It's possible to have interrupts off here:
260                  */
261                 local_irq_enable();
262
263                 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
264
265                 return;
266         }
267
268         no_context(regs, error_code, address);
269 }
270
271 static noinline void
272 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
273                      unsigned long address)
274 {
275         __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
276 }
277
278 static void
279 __bad_area(struct pt_regs *regs, unsigned long error_code,
280            unsigned long address, int si_code)
281 {
282         struct mm_struct *mm = current->mm;
283
284         /*
285          * Something tried to access memory that isn't in our memory map..
286          * Fix it, but check if it's kernel or user first..
287          */
288         up_read(&mm->mmap_sem);
289
290         __bad_area_nosemaphore(regs, error_code, address, si_code);
291 }
292
293 static noinline void
294 bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
295 {
296         __bad_area(regs, error_code, address, SEGV_MAPERR);
297 }
298
299 static noinline void
300 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
301                       unsigned long address)
302 {
303         __bad_area(regs, error_code, address, SEGV_ACCERR);
304 }
305
306 static void
307 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
308 {
309         struct task_struct *tsk = current;
310         struct mm_struct *mm = tsk->mm;
311
312         up_read(&mm->mmap_sem);
313
314         /* Kernel mode? Handle exceptions or die: */
315         if (!user_mode(regs))
316                 no_context(regs, error_code, address);
317
318         force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
319 }
320
321 static noinline int
322 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
323                unsigned long address, unsigned int fault)
324 {
325         /*
326          * Pagefault was interrupted by SIGKILL. We have no reason to
327          * continue pagefault.
328          */
329         if (fatal_signal_pending(current)) {
330                 if (!(fault & VM_FAULT_RETRY))
331                         up_read(&current->mm->mmap_sem);
332                 if (!user_mode(regs))
333                         no_context(regs, error_code, address);
334                 return 1;
335         }
336
337         if (!(fault & VM_FAULT_ERROR))
338                 return 0;
339
340         if (fault & VM_FAULT_OOM) {
341                 /* Kernel mode? Handle exceptions or die: */
342                 if (!user_mode(regs)) {
343                         up_read(&current->mm->mmap_sem);
344                         no_context(regs, error_code, address);
345                         return 1;
346                 }
347                 up_read(&current->mm->mmap_sem);
348
349                 /*
350                  * We ran out of memory, call the OOM killer, and return the
351                  * userspace (which will retry the fault, or kill us if we got
352                  * oom-killed):
353                  */
354                 pagefault_out_of_memory();
355         } else {
356                 if (fault & VM_FAULT_SIGBUS)
357                         do_sigbus(regs, error_code, address);
358                 else if (fault & VM_FAULT_SIGSEGV)
359                         bad_area(regs, error_code, address);
360                 else
361                         BUG();
362         }
363
364         return 1;
365 }
366
367 static inline int access_error(int error_code, struct vm_area_struct *vma)
368 {
369         if (error_code & FAULT_CODE_WRITE) {
370                 /* write, present and write, not present: */
371                 if (unlikely(!(vma->vm_flags & VM_WRITE)))
372                         return 1;
373                 return 0;
374         }
375
376         /* ITLB miss on NX page */
377         if (unlikely((error_code & FAULT_CODE_ITLB) &&
378                      !(vma->vm_flags & VM_EXEC)))
379                 return 1;
380
381         /* read, not present: */
382         if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
383                 return 1;
384
385         return 0;
386 }
387
388 static int fault_in_kernel_space(unsigned long address)
389 {
390         return address >= TASK_SIZE;
391 }
392
393 /*
394  * This routine handles page faults.  It determines the address,
395  * and the problem, and then passes it off to one of the appropriate
396  * routines.
397  */
398 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
399                                         unsigned long error_code,
400                                         unsigned long address)
401 {
402         unsigned long vec;
403         struct task_struct *tsk;
404         struct mm_struct *mm;
405         struct vm_area_struct * vma;
406         int fault;
407         unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
408
409         tsk = current;
410         mm = tsk->mm;
411         vec = lookup_exception_vector();
412
413         /*
414          * We fault-in kernel-space virtual memory on-demand. The
415          * 'reference' page table is init_mm.pgd.
416          *
417          * NOTE! We MUST NOT take any locks for this case. We may
418          * be in an interrupt or a critical region, and should
419          * only copy the information from the master page table,
420          * nothing more.
421          */
422         if (unlikely(fault_in_kernel_space(address))) {
423                 if (vmalloc_fault(address) >= 0)
424                         return;
425                 if (notify_page_fault(regs, vec))
426                         return;
427
428                 bad_area_nosemaphore(regs, error_code, address);
429                 return;
430         }
431
432         if (unlikely(notify_page_fault(regs, vec)))
433                 return;
434
435         /* Only enable interrupts if they were on before the fault */
436         if ((regs->sr & SR_IMASK) != SR_IMASK)
437                 local_irq_enable();
438
439         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
440
441         /*
442          * If we're in an interrupt, have no user context or are running
443          * with pagefaults disabled then we must not take the fault:
444          */
445         if (unlikely(faulthandler_disabled() || !mm)) {
446                 bad_area_nosemaphore(regs, error_code, address);
447                 return;
448         }
449
450 retry:
451         down_read(&mm->mmap_sem);
452
453         vma = find_vma(mm, address);
454         if (unlikely(!vma)) {
455                 bad_area(regs, error_code, address);
456                 return;
457         }
458         if (likely(vma->vm_start <= address))
459                 goto good_area;
460         if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
461                 bad_area(regs, error_code, address);
462                 return;
463         }
464         if (unlikely(expand_stack(vma, address))) {
465                 bad_area(regs, error_code, address);
466                 return;
467         }
468
469         /*
470          * Ok, we have a good vm_area for this memory access, so
471          * we can handle it..
472          */
473 good_area:
474         if (unlikely(access_error(error_code, vma))) {
475                 bad_area_access_error(regs, error_code, address);
476                 return;
477         }
478
479         set_thread_fault_code(error_code);
480
481         if (user_mode(regs))
482                 flags |= FAULT_FLAG_USER;
483         if (error_code & FAULT_CODE_WRITE)
484                 flags |= FAULT_FLAG_WRITE;
485
486         /*
487          * If for any reason at all we couldn't handle the fault,
488          * make sure we exit gracefully rather than endlessly redo
489          * the fault.
490          */
491         fault = handle_mm_fault(vma, address, flags);
492
493         if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
494                 if (mm_fault_error(regs, error_code, address, fault))
495                         return;
496
497         if (flags & FAULT_FLAG_ALLOW_RETRY) {
498                 if (fault & VM_FAULT_MAJOR) {
499                         tsk->maj_flt++;
500                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
501                                       regs, address);
502                 } else {
503                         tsk->min_flt++;
504                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
505                                       regs, address);
506                 }
507                 if (fault & VM_FAULT_RETRY) {
508                         flags &= ~FAULT_FLAG_ALLOW_RETRY;
509                         flags |= FAULT_FLAG_TRIED;
510
511                         /*
512                          * No need to up_read(&mm->mmap_sem) as we would
513                          * have already released it in __lock_page_or_retry
514                          * in mm/filemap.c.
515                          */
516                         goto retry;
517                 }
518         }
519
520         up_read(&mm->mmap_sem);
521 }