GNU Linux-libre 5.19-rc6-gnu
[releases.git] / arch / x86 / kernel / kprobes / opt.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes Jump Optimization (Optprobes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  * Copyright (C) Hitachi Ltd., 2012
7  */
8 #include <linux/kprobes.h>
9 #include <linux/perf_event.h>
10 #include <linux/ptrace.h>
11 #include <linux/string.h>
12 #include <linux/slab.h>
13 #include <linux/hardirq.h>
14 #include <linux/preempt.h>
15 #include <linux/extable.h>
16 #include <linux/kdebug.h>
17 #include <linux/kallsyms.h>
18 #include <linux/ftrace.h>
19 #include <linux/objtool.h>
20 #include <linux/pgtable.h>
21 #include <linux/static_call.h>
22
23 #include <asm/text-patching.h>
24 #include <asm/cacheflush.h>
25 #include <asm/desc.h>
26 #include <linux/uaccess.h>
27 #include <asm/alternative.h>
28 #include <asm/insn.h>
29 #include <asm/debugreg.h>
30 #include <asm/set_memory.h>
31 #include <asm/sections.h>
32 #include <asm/nospec-branch.h>
33
34 #include "common.h"
35
36 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
37 {
38         struct optimized_kprobe *op;
39         struct kprobe *kp;
40         long offs;
41         int i;
42
43         for (i = 0; i < JMP32_INSN_SIZE; i++) {
44                 kp = get_kprobe((void *)addr - i);
45                 /* This function only handles jump-optimized kprobe */
46                 if (kp && kprobe_optimized(kp)) {
47                         op = container_of(kp, struct optimized_kprobe, kp);
48                         /* If op->list is not empty, op is under optimizing */
49                         if (list_empty(&op->list))
50                                 goto found;
51                 }
52         }
53
54         return addr;
55 found:
56         /*
57          * If the kprobe can be optimized, original bytes which can be
58          * overwritten by jump destination address. In this case, original
59          * bytes must be recovered from op->optinsn.copied_insn buffer.
60          */
61         if (copy_from_kernel_nofault(buf, (void *)addr,
62                 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
63                 return 0UL;
64
65         if (addr == (unsigned long)kp->addr) {
66                 buf[0] = kp->opcode;
67                 memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
68         } else {
69                 offs = addr - (unsigned long)kp->addr - 1;
70                 memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
71         }
72
73         return (unsigned long)buf;
74 }
75
76 static void synthesize_clac(kprobe_opcode_t *addr)
77 {
78         /*
79          * Can't be static_cpu_has() due to how objtool treats this feature bit.
80          * This isn't a fast path anyway.
81          */
82         if (!boot_cpu_has(X86_FEATURE_SMAP))
83                 return;
84
85         /* Replace the NOP3 with CLAC */
86         addr[0] = 0x0f;
87         addr[1] = 0x01;
88         addr[2] = 0xca;
89 }
90
91 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
92 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
93 {
94 #ifdef CONFIG_X86_64
95         *addr++ = 0x48;
96         *addr++ = 0xbf;
97 #else
98         *addr++ = 0xb8;
99 #endif
100         *(unsigned long *)addr = val;
101 }
102
103 asm (
104                         ".pushsection .rodata\n"
105                         "optprobe_template_func:\n"
106                         ".global optprobe_template_entry\n"
107                         "optprobe_template_entry:\n"
108 #ifdef CONFIG_X86_64
109                         "       pushq $" __stringify(__KERNEL_DS) "\n"
110                         /* Save the 'sp - 8', this will be fixed later. */
111                         "       pushq %rsp\n"
112                         "       pushfq\n"
113                         ".global optprobe_template_clac\n"
114                         "optprobe_template_clac:\n"
115                         ASM_NOP3
116                         SAVE_REGS_STRING
117                         "       movq %rsp, %rsi\n"
118                         ".global optprobe_template_val\n"
119                         "optprobe_template_val:\n"
120                         ASM_NOP5
121                         ASM_NOP5
122                         ".global optprobe_template_call\n"
123                         "optprobe_template_call:\n"
124                         ASM_NOP5
125                         /* Copy 'regs->flags' into 'regs->ss'. */
126                         "       movq 18*8(%rsp), %rdx\n"
127                         "       movq %rdx, 20*8(%rsp)\n"
128                         RESTORE_REGS_STRING
129                         /* Skip 'regs->flags' and 'regs->sp'. */
130                         "       addq $16, %rsp\n"
131                         /* And pop flags register from 'regs->ss'. */
132                         "       popfq\n"
133 #else /* CONFIG_X86_32 */
134                         "       pushl %ss\n"
135                         /* Save the 'sp - 4', this will be fixed later. */
136                         "       pushl %esp\n"
137                         "       pushfl\n"
138                         ".global optprobe_template_clac\n"
139                         "optprobe_template_clac:\n"
140                         ASM_NOP3
141                         SAVE_REGS_STRING
142                         "       movl %esp, %edx\n"
143                         ".global optprobe_template_val\n"
144                         "optprobe_template_val:\n"
145                         ASM_NOP5
146                         ".global optprobe_template_call\n"
147                         "optprobe_template_call:\n"
148                         ASM_NOP5
149                         /* Copy 'regs->flags' into 'regs->ss'. */
150                         "       movl 14*4(%esp), %edx\n"
151                         "       movl %edx, 16*4(%esp)\n"
152                         RESTORE_REGS_STRING
153                         /* Skip 'regs->flags' and 'regs->sp'. */
154                         "       addl $8, %esp\n"
155                         /* And pop flags register from 'regs->ss'. */
156                         "       popfl\n"
157 #endif
158                         ".global optprobe_template_end\n"
159                         "optprobe_template_end:\n"
160                         ".popsection\n");
161
162 void optprobe_template_func(void);
163 STACK_FRAME_NON_STANDARD(optprobe_template_func);
164
165 #define TMPL_CLAC_IDX \
166         ((long)optprobe_template_clac - (long)optprobe_template_entry)
167 #define TMPL_MOVE_IDX \
168         ((long)optprobe_template_val - (long)optprobe_template_entry)
169 #define TMPL_CALL_IDX \
170         ((long)optprobe_template_call - (long)optprobe_template_entry)
171 #define TMPL_END_IDX \
172         ((long)optprobe_template_end - (long)optprobe_template_entry)
173
174 /* Optimized kprobe call back function: called from optinsn */
175 static void
176 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
177 {
178         /* This is possible if op is under delayed unoptimizing */
179         if (kprobe_disabled(&op->kp))
180                 return;
181
182         preempt_disable();
183         if (kprobe_running()) {
184                 kprobes_inc_nmissed_count(&op->kp);
185         } else {
186                 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
187                 /* Adjust stack pointer */
188                 regs->sp += sizeof(long);
189                 /* Save skipped registers */
190                 regs->cs = __KERNEL_CS;
191 #ifdef CONFIG_X86_32
192                 regs->gs = 0;
193 #endif
194                 regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
195                 regs->orig_ax = ~0UL;
196
197                 __this_cpu_write(current_kprobe, &op->kp);
198                 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
199                 opt_pre_handler(&op->kp, regs);
200                 __this_cpu_write(current_kprobe, NULL);
201         }
202         preempt_enable();
203 }
204 NOKPROBE_SYMBOL(optimized_callback);
205
206 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
207 {
208         struct insn insn;
209         int len = 0, ret;
210
211         while (len < JMP32_INSN_SIZE) {
212                 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
213                 if (!ret || !can_boost(&insn, src + len))
214                         return -EINVAL;
215                 len += ret;
216         }
217         /* Check whether the address range is reserved */
218         if (ftrace_text_reserved(src, src + len - 1) ||
219             alternatives_text_reserved(src, src + len - 1) ||
220             jump_label_text_reserved(src, src + len - 1) ||
221             static_call_text_reserved(src, src + len - 1))
222                 return -EBUSY;
223
224         return len;
225 }
226
227 /* Check whether insn is indirect jump */
228 static int __insn_is_indirect_jump(struct insn *insn)
229 {
230         return ((insn->opcode.bytes[0] == 0xff &&
231                 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
232                 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
233 }
234
235 /* Check whether insn jumps into specified address range */
236 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
237 {
238         unsigned long target = 0;
239
240         switch (insn->opcode.bytes[0]) {
241         case 0xe0:      /* loopne */
242         case 0xe1:      /* loope */
243         case 0xe2:      /* loop */
244         case 0xe3:      /* jcxz */
245         case 0xe9:      /* near relative jump */
246         case 0xeb:      /* short relative jump */
247                 break;
248         case 0x0f:
249                 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
250                         break;
251                 return 0;
252         default:
253                 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
254                         break;
255                 return 0;
256         }
257         target = (unsigned long)insn->next_byte + insn->immediate.value;
258
259         return (start <= target && target <= start + len);
260 }
261
262 static int insn_is_indirect_jump(struct insn *insn)
263 {
264         int ret = __insn_is_indirect_jump(insn);
265
266 #ifdef CONFIG_RETPOLINE
267         /*
268          * Jump to x86_indirect_thunk_* is treated as an indirect jump.
269          * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
270          * older gcc may use indirect jump. So we add this check instead of
271          * replace indirect-jump check.
272          */
273         if (!ret)
274                 ret = insn_jump_into_range(insn,
275                                 (unsigned long)__indirect_thunk_start,
276                                 (unsigned long)__indirect_thunk_end -
277                                 (unsigned long)__indirect_thunk_start);
278 #endif
279         return ret;
280 }
281
282 static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
283 {
284         unsigned char ops;
285
286         for (; addr < eaddr; addr++) {
287                 if (get_kernel_nofault(ops, (void *)addr) < 0 ||
288                     ops != INT3_INSN_OPCODE)
289                         return false;
290         }
291
292         return true;
293 }
294
295 /* Decode whole function to ensure any instructions don't jump into target */
296 static int can_optimize(unsigned long paddr)
297 {
298         unsigned long addr, size = 0, offset = 0;
299         struct insn insn;
300         kprobe_opcode_t buf[MAX_INSN_SIZE];
301
302         /* Lookup symbol including addr */
303         if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
304                 return 0;
305
306         /*
307          * Do not optimize in the entry code due to the unstable
308          * stack handling and registers setup.
309          */
310         if (((paddr >= (unsigned long)__entry_text_start) &&
311              (paddr <  (unsigned long)__entry_text_end)))
312                 return 0;
313
314         /* Check there is enough space for a relative jump. */
315         if (size - offset < JMP32_INSN_SIZE)
316                 return 0;
317
318         /* Decode instructions */
319         addr = paddr - offset;
320         while (addr < paddr - offset + size) { /* Decode until function end */
321                 unsigned long recovered_insn;
322                 int ret;
323
324                 if (search_exception_tables(addr))
325                         /*
326                          * Since some fixup code will jumps into this function,
327                          * we can't optimize kprobe in this function.
328                          */
329                         return 0;
330                 recovered_insn = recover_probed_instruction(buf, addr);
331                 if (!recovered_insn)
332                         return 0;
333
334                 ret = insn_decode_kernel(&insn, (void *)recovered_insn);
335                 if (ret < 0)
336                         return 0;
337
338                 /*
339                  * In the case of detecting unknown breakpoint, this could be
340                  * a padding INT3 between functions. Let's check that all the
341                  * rest of the bytes are also INT3.
342                  */
343                 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
344                         return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
345
346                 /* Recover address */
347                 insn.kaddr = (void *)addr;
348                 insn.next_byte = (void *)(addr + insn.length);
349                 /* Check any instructions don't jump into target */
350                 if (insn_is_indirect_jump(&insn) ||
351                     insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
352                                          DISP32_SIZE))
353                         return 0;
354                 addr += insn.length;
355         }
356
357         return 1;
358 }
359
360 /* Check optimized_kprobe can actually be optimized. */
361 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
362 {
363         int i;
364         struct kprobe *p;
365
366         for (i = 1; i < op->optinsn.size; i++) {
367                 p = get_kprobe(op->kp.addr + i);
368                 if (p && !kprobe_disabled(p))
369                         return -EEXIST;
370         }
371
372         return 0;
373 }
374
375 /* Check the addr is within the optimized instructions. */
376 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
377                                  kprobe_opcode_t *addr)
378 {
379         return (op->kp.addr <= addr &&
380                 op->kp.addr + op->optinsn.size > addr);
381 }
382
383 /* Free optimized instruction slot */
384 static
385 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
386 {
387         u8 *slot = op->optinsn.insn;
388         if (slot) {
389                 int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
390
391                 /* Record the perf event before freeing the slot */
392                 if (dirty)
393                         perf_event_text_poke(slot, slot, len, NULL, 0);
394
395                 free_optinsn_slot(slot, dirty);
396                 op->optinsn.insn = NULL;
397                 op->optinsn.size = 0;
398         }
399 }
400
401 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
402 {
403         __arch_remove_optimized_kprobe(op, 1);
404 }
405
406 /*
407  * Copy replacing target instructions
408  * Target instructions MUST be relocatable (checked inside)
409  * This is called when new aggr(opt)probe is allocated or reused.
410  */
411 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
412                                   struct kprobe *__unused)
413 {
414         u8 *buf = NULL, *slot;
415         int ret, len;
416         long rel;
417
418         if (!can_optimize((unsigned long)op->kp.addr))
419                 return -EILSEQ;
420
421         buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
422         if (!buf)
423                 return -ENOMEM;
424
425         op->optinsn.insn = slot = get_optinsn_slot();
426         if (!slot) {
427                 ret = -ENOMEM;
428                 goto out;
429         }
430
431         /*
432          * Verify if the address gap is in 2GB range, because this uses
433          * a relative jump.
434          */
435         rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
436         if (abs(rel) > 0x7fffffff) {
437                 ret = -ERANGE;
438                 goto err;
439         }
440
441         /* Copy arch-dep-instance from template */
442         memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
443
444         /* Copy instructions into the out-of-line buffer */
445         ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
446                                           slot + TMPL_END_IDX);
447         if (ret < 0)
448                 goto err;
449         op->optinsn.size = ret;
450         len = TMPL_END_IDX + op->optinsn.size;
451
452         synthesize_clac(buf + TMPL_CLAC_IDX);
453
454         /* Set probe information */
455         synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
456
457         /* Set probe function call */
458         synthesize_relcall(buf + TMPL_CALL_IDX,
459                            slot + TMPL_CALL_IDX, optimized_callback);
460
461         /* Set returning jmp instruction at the tail of out-of-line buffer */
462         synthesize_reljump(buf + len, slot + len,
463                            (u8 *)op->kp.addr + op->optinsn.size);
464         len += JMP32_INSN_SIZE;
465
466         /*
467          * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
468          * used in __arch_remove_optimized_kprobe().
469          */
470
471         /* We have to use text_poke() for instruction buffer because it is RO */
472         perf_event_text_poke(slot, NULL, 0, buf, len);
473         text_poke(slot, buf, len);
474
475         ret = 0;
476 out:
477         kfree(buf);
478         return ret;
479
480 err:
481         __arch_remove_optimized_kprobe(op, 0);
482         goto out;
483 }
484
485 /*
486  * Replace breakpoints (INT3) with relative jumps (JMP.d32).
487  * Caller must call with locking kprobe_mutex and text_mutex.
488  *
489  * The caller will have installed a regular kprobe and after that issued
490  * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
491  * the 4 bytes after the INT3 are unused and can now be overwritten.
492  */
493 void arch_optimize_kprobes(struct list_head *oplist)
494 {
495         struct optimized_kprobe *op, *tmp;
496         u8 insn_buff[JMP32_INSN_SIZE];
497
498         list_for_each_entry_safe(op, tmp, oplist, list) {
499                 s32 rel = (s32)((long)op->optinsn.insn -
500                         ((long)op->kp.addr + JMP32_INSN_SIZE));
501
502                 WARN_ON(kprobe_disabled(&op->kp));
503
504                 /* Backup instructions which will be replaced by jump address */
505                 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
506                        DISP32_SIZE);
507
508                 insn_buff[0] = JMP32_INSN_OPCODE;
509                 *(s32 *)(&insn_buff[1]) = rel;
510
511                 text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
512
513                 list_del_init(&op->list);
514         }
515 }
516
517 /*
518  * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
519  *
520  * After that, we can restore the 4 bytes after the INT3 to undo what
521  * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
522  * unused once the INT3 lands.
523  */
524 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
525 {
526         u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
527         u8 old[JMP32_INSN_SIZE];
528         u8 *addr = op->kp.addr;
529
530         memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
531         memcpy(new + INT3_INSN_SIZE,
532                op->optinsn.copied_insn,
533                JMP32_INSN_SIZE - INT3_INSN_SIZE);
534
535         text_poke(addr, new, INT3_INSN_SIZE);
536         text_poke_sync();
537         text_poke(addr + INT3_INSN_SIZE,
538                   new + INT3_INSN_SIZE,
539                   JMP32_INSN_SIZE - INT3_INSN_SIZE);
540         text_poke_sync();
541
542         perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
543 }
544
545 /*
546  * Recover original instructions and breakpoints from relative jumps.
547  * Caller must call with locking kprobe_mutex.
548  */
549 extern void arch_unoptimize_kprobes(struct list_head *oplist,
550                                     struct list_head *done_list)
551 {
552         struct optimized_kprobe *op, *tmp;
553
554         list_for_each_entry_safe(op, tmp, oplist, list) {
555                 arch_unoptimize_kprobe(op);
556                 list_move(&op->list, done_list);
557         }
558 }
559
560 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
561 {
562         struct optimized_kprobe *op;
563
564         if (p->flags & KPROBE_FLAG_OPTIMIZED) {
565                 /* This kprobe is really able to run optimized path. */
566                 op = container_of(p, struct optimized_kprobe, kp);
567                 /* Detour through copied instructions */
568                 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
569                 if (!reenter)
570                         reset_current_kprobe();
571                 return 1;
572         }
573         return 0;
574 }
575 NOKPROBE_SYMBOL(setup_detour_execution);