1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
4 * using the CPU's debug registers. Derived from
5 * "arch/x86/kernel/hw_breakpoint.c"
7 * Copyright 2010 IBM Corporation
8 * Author: K.Prasad <prasad@linux.vnet.ibm.com>
11 #include <linux/hw_breakpoint.h>
12 #include <linux/notifier.h>
13 #include <linux/kprobes.h>
14 #include <linux/percpu.h>
15 #include <linux/kernel.h>
16 #include <linux/sched.h>
17 #include <linux/smp.h>
18 #include <linux/spinlock.h>
19 #include <linux/debugfs.h>
20 #include <linux/init.h>
22 #include <asm/hw_breakpoint.h>
23 #include <asm/processor.h>
24 #include <asm/sstep.h>
25 #include <asm/debug.h>
26 #include <asm/hvcall.h>
28 #include <linux/uaccess.h>
31 * Stores the breakpoints currently in use on each breakpoint address
32 * register for every cpu
34 static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
37 * Returns total number of data or instruction breakpoints available.
39 int hw_breakpoint_slots(int type)
41 if (type == TYPE_DATA)
43 return 0; /* no instruction breakpoints available */
46 static bool single_step_pending(void)
50 for (i = 0; i < nr_wp_slots(); i++) {
51 if (current->thread.last_hit_ubp[i])
58 * Install a perf counter breakpoint.
60 * We seek a free debug address register and use it for this
63 * Atomic: we hold the counter->ctx->lock and we only handle variables
64 * and registers local to this cpu.
66 int arch_install_hw_breakpoint(struct perf_event *bp)
68 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
69 struct perf_event **slot;
72 for (i = 0; i < nr_wp_slots(); i++) {
73 slot = this_cpu_ptr(&bp_per_reg[i]);
80 if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
84 * Do not install DABR values if the instruction must be single-stepped.
85 * If so, DABR will be populated in single_step_dabr_instruction().
87 if (!single_step_pending())
88 __set_breakpoint(i, info);
94 * Uninstall the breakpoint contained in the given counter.
96 * First we search the debug address register it uses and then we disable
99 * Atomic: we hold the counter->ctx->lock and we only handle variables
100 * and registers local to this cpu.
102 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
104 struct arch_hw_breakpoint null_brk = {0};
105 struct perf_event **slot;
108 for (i = 0; i < nr_wp_slots(); i++) {
109 slot = this_cpu_ptr(&bp_per_reg[i]);
116 if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
119 __set_breakpoint(i, &null_brk);
122 static bool is_ptrace_bp(struct perf_event *bp)
124 return bp->overflow_handler == ptrace_triggered;
128 struct list_head list;
129 struct perf_event *bp;
134 * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
135 * rely on it safely synchronizing internals here; however, we can rely on it
136 * not requesting more breakpoints than available.
138 static DEFINE_SPINLOCK(cpu_bps_lock);
139 static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
140 static DEFINE_SPINLOCK(task_bps_lock);
141 static LIST_HEAD(task_bps);
143 static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
145 struct breakpoint *tmp;
147 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
149 return ERR_PTR(-ENOMEM);
151 tmp->ptrace_bp = is_ptrace_bp(bp);
155 static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
157 __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
159 bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
160 bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
161 bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
162 bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
164 return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
167 static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
169 return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
172 static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
174 return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
177 static int task_bps_add(struct perf_event *bp)
179 struct breakpoint *tmp;
181 tmp = alloc_breakpoint(bp);
185 spin_lock(&task_bps_lock);
186 list_add(&tmp->list, &task_bps);
187 spin_unlock(&task_bps_lock);
191 static void task_bps_remove(struct perf_event *bp)
193 struct list_head *pos, *q;
195 spin_lock(&task_bps_lock);
196 list_for_each_safe(pos, q, &task_bps) {
197 struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
200 list_del(&tmp->list);
205 spin_unlock(&task_bps_lock);
209 * If any task has breakpoint from alternate infrastructure,
210 * return true. Otherwise return false.
212 static bool all_task_bps_check(struct perf_event *bp)
214 struct breakpoint *tmp;
217 spin_lock(&task_bps_lock);
218 list_for_each_entry(tmp, &task_bps, list) {
219 if (!can_co_exist(tmp, bp)) {
224 spin_unlock(&task_bps_lock);
229 * If same task has breakpoint from alternate infrastructure,
230 * return true. Otherwise return false.
232 static bool same_task_bps_check(struct perf_event *bp)
234 struct breakpoint *tmp;
237 spin_lock(&task_bps_lock);
238 list_for_each_entry(tmp, &task_bps, list) {
239 if (tmp->bp->hw.target == bp->hw.target &&
240 !can_co_exist(tmp, bp)) {
245 spin_unlock(&task_bps_lock);
249 static int cpu_bps_add(struct perf_event *bp)
251 struct breakpoint **cpu_bp;
252 struct breakpoint *tmp;
255 tmp = alloc_breakpoint(bp);
259 spin_lock(&cpu_bps_lock);
260 cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
261 for (i = 0; i < nr_wp_slots(); i++) {
267 spin_unlock(&cpu_bps_lock);
271 static void cpu_bps_remove(struct perf_event *bp)
273 struct breakpoint **cpu_bp;
276 spin_lock(&cpu_bps_lock);
277 cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
278 for (i = 0; i < nr_wp_slots(); i++) {
282 if (cpu_bp[i]->bp == bp) {
288 spin_unlock(&cpu_bps_lock);
291 static bool cpu_bps_check(int cpu, struct perf_event *bp)
293 struct breakpoint **cpu_bp;
297 spin_lock(&cpu_bps_lock);
298 cpu_bp = per_cpu_ptr(cpu_bps, cpu);
299 for (i = 0; i < nr_wp_slots(); i++) {
300 if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
305 spin_unlock(&cpu_bps_lock);
309 static bool all_cpu_bps_check(struct perf_event *bp)
313 for_each_online_cpu(cpu) {
314 if (cpu_bps_check(cpu, bp))
320 int arch_reserve_bp_slot(struct perf_event *bp)
324 /* ptrace breakpoint */
325 if (is_ptrace_bp(bp)) {
326 if (all_cpu_bps_check(bp))
329 if (same_task_bps_check(bp))
332 return task_bps_add(bp);
335 /* perf breakpoint */
336 if (is_kernel_addr(bp->attr.bp_addr))
339 if (bp->hw.target && bp->cpu == -1) {
340 if (same_task_bps_check(bp))
343 return task_bps_add(bp);
344 } else if (!bp->hw.target && bp->cpu != -1) {
345 if (all_task_bps_check(bp))
348 return cpu_bps_add(bp);
351 if (same_task_bps_check(bp))
354 ret = cpu_bps_add(bp);
357 ret = task_bps_add(bp);
364 void arch_release_bp_slot(struct perf_event *bp)
366 if (!is_kernel_addr(bp->attr.bp_addr)) {
375 * Perform cleanup of arch-specific counters during unregistration
378 void arch_unregister_hw_breakpoint(struct perf_event *bp)
381 * If the breakpoint is unregistered between a hw_breakpoint_handler()
382 * and the single_step_dabr_instruction(), then cleanup the breakpoint
383 * restoration variables to prevent dangling pointers.
384 * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
386 if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) {
389 for (i = 0; i < nr_wp_slots(); i++) {
390 if (bp->ctx->task->thread.last_hit_ubp[i] == bp)
391 bp->ctx->task->thread.last_hit_ubp[i] = NULL;
397 * Check for virtual address in kernel space.
399 int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
401 return is_kernel_addr(hw->address);
404 int arch_bp_generic_fields(int type, int *gen_bp_type)
407 if (type & HW_BRK_TYPE_READ)
408 *gen_bp_type |= HW_BREAKPOINT_R;
409 if (type & HW_BRK_TYPE_WRITE)
410 *gen_bp_type |= HW_BREAKPOINT_W;
411 if (*gen_bp_type == 0)
417 * Watchpoint match range is always doubleword(8 bytes) aligned on
418 * powerpc. If the given range is crossing doubleword boundary, we
419 * need to increase the length such that next doubleword also get
422 * address len = 6 bytes
424 * |------------v--|------v--------|
425 * | | | | | | | | | | | | | | | | |
426 * |---------------|---------------|
429 * In this case, we should configure hw as:
430 * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
433 * @start_addr is inclusive but @end_addr is exclusive.
435 static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
437 u16 max_len = DABR_MAX_LEN;
439 unsigned long start_addr, end_addr;
441 start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
442 end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
443 hw_len = end_addr - start_addr;
445 if (dawr_enabled()) {
446 max_len = DAWR_MAX_LEN;
447 /* DAWR region can't cross 512 bytes boundary on p10 predecessors */
448 if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
449 (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
451 } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
452 /* 8xx can setup a range without limitation */
456 if (hw_len > max_len)
464 * Validate the arch-specific HW Breakpoint register settings
466 int hw_breakpoint_arch_parse(struct perf_event *bp,
467 const struct perf_event_attr *attr,
468 struct arch_hw_breakpoint *hw)
472 if (!bp || !attr->bp_len)
475 hw->type = HW_BRK_TYPE_TRANSLATE;
476 if (attr->bp_type & HW_BREAKPOINT_R)
477 hw->type |= HW_BRK_TYPE_READ;
478 if (attr->bp_type & HW_BREAKPOINT_W)
479 hw->type |= HW_BRK_TYPE_WRITE;
480 if (hw->type == HW_BRK_TYPE_TRANSLATE)
481 /* must set alteast read or write */
483 if (!attr->exclude_user)
484 hw->type |= HW_BRK_TYPE_USER;
485 if (!attr->exclude_kernel)
486 hw->type |= HW_BRK_TYPE_KERNEL;
487 if (!attr->exclude_hv)
488 hw->type |= HW_BRK_TYPE_HYP;
489 hw->address = attr->bp_addr;
490 hw->len = attr->bp_len;
492 if (!ppc_breakpoint_available())
495 return hw_breakpoint_validate_len(hw);
499 * Restores the breakpoint on the debug registers.
500 * Invoke this function if it is known that the execution context is
501 * about to change to cause loss of MSR_SE settings.
503 void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
505 struct arch_hw_breakpoint *info;
510 for (i = 0; i < nr_wp_slots(); i++) {
511 if (unlikely(tsk->thread.last_hit_ubp[i]))
517 regs_set_return_msr(regs, regs->msr & ~MSR_SE);
518 for (i = 0; i < nr_wp_slots(); i++) {
519 info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
520 __set_breakpoint(i, info);
521 tsk->thread.last_hit_ubp[i] = NULL;
528 static bool is_larx_stcx_instr(int type)
530 return type == LARX || type == STCX;
533 static bool is_octword_vsx_instr(int type, int size)
535 return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
539 * We've failed in reliably handling the hw-breakpoint. Unregister
540 * it and throw a warning message to let the user know about it.
542 static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info)
544 WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
546 perf_event_disable_inatomic(bp);
549 static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info)
551 printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
553 perf_event_disable_inatomic(bp);
556 static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
557 struct arch_hw_breakpoint **info, int *hit,
563 /* Do not emulate user-space instructions, instead single-step them */
564 if (user_mode(regs)) {
565 for (i = 0; i < nr_wp_slots(); i++) {
568 current->thread.last_hit_ubp[i] = bp[i];
571 regs_set_return_msr(regs, regs->msr | MSR_SE);
575 stepped = emulate_step(regs, instr);
577 for (i = 0; i < nr_wp_slots(); i++) {
580 handler_error(bp[i], info[i]);
588 static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
589 int *hit, unsigned long ea)
592 unsigned long hw_end_addr;
595 * Handle spurious exception only when any bp_per_reg is set.
596 * Otherwise this might be created by xmon and not actually a
597 * spurious exception.
599 for (i = 0; i < nr_wp_slots(); i++) {
603 hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE);
606 * Ending address of DAWR range is less than starting
609 if ((hw_end_addr - 1) >= ea)
613 * Those addresses need to be in the same or in two
614 * consecutive 512B blocks;
616 if (((hw_end_addr - 1) >> 10) != (ea >> 10))
620 * 'op address + 64B' generates an address that has a
621 * carry into bit 52 (crosses 2K boundary).
623 if ((ea & 0x800) == ((ea + 64) & 0x800))
629 if (i == nr_wp_slots())
632 for (i = 0; i < nr_wp_slots(); i++) {
635 info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
641 * Handle a DABR or DAWR exception.
643 * Called in atomic context.
645 int hw_breakpoint_handler(struct die_args *args)
648 int rc = NOTIFY_STOP;
649 struct perf_event *bp[HBP_NUM_MAX] = { NULL };
650 struct pt_regs *regs = args->regs;
651 struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL };
653 int hit[HBP_NUM_MAX] = {0};
655 bool ptrace_bp = false;
656 ppc_inst_t instr = ppc_inst(0);
659 unsigned long ea = 0;
661 /* Disable breakpoints during exception handling */
662 hw_breakpoint_disable();
665 * The counter may be concurrently released but that can only
666 * occur from a call_rcu() path. We can then safely fetch
667 * the breakpoint, use its callback, touch its counter
668 * while we are in an rcu_read_lock() path.
672 if (!IS_ENABLED(CONFIG_PPC_8xx))
673 wp_get_instr_detail(regs, &instr, &type, &size, &ea);
675 for (i = 0; i < nr_wp_slots(); i++) {
676 bp[i] = __this_cpu_read(bp_per_reg[i]);
680 info[i] = counter_arch_bp(bp[i]);
681 info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
683 if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
684 if (!IS_ENABLED(CONFIG_PPC_8xx) &&
685 ppc_inst_equal(instr, ppc_inst(0))) {
686 handler_error(bp[i], info[i]);
692 if (is_ptrace_bp(bp[i]))
703 /* Workaround for Power10 DD1 */
704 if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
705 is_octword_vsx_instr(type, size)) {
706 handle_p10dd1_spurious_exception(info, hit, ea);
714 * Return early after invoking user-callback function without restoring
715 * DABR if the breakpoint is from ptrace which always operates in
716 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
717 * generated in do_dabr().
720 for (i = 0; i < nr_wp_slots(); i++) {
723 perf_bp_event(bp[i], regs);
730 if (!IS_ENABLED(CONFIG_PPC_8xx)) {
731 if (is_larx_stcx_instr(type)) {
732 for (i = 0; i < nr_wp_slots(); i++) {
735 larx_stcx_err(bp[i], info[i]);
741 if (!stepping_handler(regs, bp, info, hit, instr))
746 * As a policy, the callback is invoked in a 'trigger-after-execute'
749 for (i = 0; i < nr_wp_slots(); i++) {
752 if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
753 perf_bp_event(bp[i], regs);
757 for (i = 0; i < nr_wp_slots(); i++) {
760 __set_breakpoint(i, info[i]);
767 NOKPROBE_SYMBOL(hw_breakpoint_handler);
770 * Handle single-step exceptions following a DABR hit.
772 * Called in atomic context.
774 static int single_step_dabr_instruction(struct die_args *args)
776 struct pt_regs *regs = args->regs;
777 struct perf_event *bp = NULL;
778 struct arch_hw_breakpoint *info;
783 * Check if we are single-stepping as a result of a
784 * previous HW Breakpoint exception
786 for (i = 0; i < nr_wp_slots(); i++) {
787 bp = current->thread.last_hit_ubp[i];
793 info = counter_arch_bp(bp);
796 * We shall invoke the user-defined callback function in the
797 * single stepping handler to confirm to 'trigger-after-execute'
800 if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
801 perf_bp_event(bp, regs);
802 current->thread.last_hit_ubp[i] = NULL;
808 for (i = 0; i < nr_wp_slots(); i++) {
809 bp = __this_cpu_read(bp_per_reg[i]);
813 info = counter_arch_bp(bp);
814 __set_breakpoint(i, info);
818 * If the process was being single-stepped by ptrace, let the
819 * other single-step actions occur (e.g. generate SIGTRAP).
821 if (test_thread_flag(TIF_SINGLESTEP))
826 NOKPROBE_SYMBOL(single_step_dabr_instruction);
829 * Handle debug exception notifications.
831 * Called in atomic context.
833 int hw_breakpoint_exceptions_notify(
834 struct notifier_block *unused, unsigned long val, void *data)
836 int ret = NOTIFY_DONE;
840 ret = hw_breakpoint_handler(data);
843 ret = single_step_dabr_instruction(data);
849 NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
852 * Release the user breakpoints used by ptrace
854 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
857 struct thread_struct *t = &tsk->thread;
859 for (i = 0; i < nr_wp_slots(); i++) {
860 unregister_hw_breakpoint(t->ptrace_bps[i]);
861 t->ptrace_bps[i] = NULL;
865 void hw_breakpoint_pmu_read(struct perf_event *bp)
870 void ptrace_triggered(struct perf_event *bp,
871 struct perf_sample_data *data, struct pt_regs *regs)
873 struct perf_event_attr attr;
876 * Disable the breakpoint request here since ptrace has defined a
877 * one-shot behaviour for breakpoint exceptions in PPC64.
878 * The SIGTRAP signal is generated automatically for us in do_dabr().
879 * We don't have to do anything about that here
882 attr.disabled = true;
883 modify_user_hw_breakpoint(bp, &attr);