2 * Kprobes-based tracing events
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #define pr_fmt(fmt) "trace_kprobe: " fmt
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/rculist.h>
25 #include "trace_probe.h"
27 #define KPROBE_EVENT_SYSTEM "kprobes"
28 #define KRETPROBE_MAXACTIVE_MAX 4096
31 * Kprobe event core functions
34 struct list_head list;
35 struct kretprobe rp; /* Use rp.kp for kprobe use */
36 unsigned long __percpu *nhit;
37 const char *symbol; /* symbol name */
38 struct trace_probe tp;
41 #define SIZEOF_TRACE_KPROBE(n) \
42 (offsetof(struct trace_kprobe, tp.args) + \
43 (sizeof(struct probe_arg) * (n)))
46 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
48 return tk->rp.handler != NULL;
51 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
53 return tk->symbol ? tk->symbol : "unknown";
56 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
58 return tk->rp.kp.offset;
61 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
63 return !!(kprobe_gone(&tk->rp.kp));
66 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
69 int len = strlen(mod->name);
70 const char *name = trace_kprobe_symbol(tk);
71 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
74 static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
76 return !!strchr(trace_kprobe_symbol(tk), ':');
79 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
81 unsigned long nhit = 0;
84 for_each_possible_cpu(cpu)
85 nhit += *per_cpu_ptr(tk->nhit, cpu);
90 static int register_kprobe_event(struct trace_kprobe *tk);
91 static int unregister_kprobe_event(struct trace_kprobe *tk);
93 static DEFINE_MUTEX(probe_lock);
94 static LIST_HEAD(probe_list);
96 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
97 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
98 struct pt_regs *regs);
100 /* Memory fetching by symbol */
101 struct symbol_cache {
107 unsigned long update_symbol_cache(struct symbol_cache *sc)
109 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
112 sc->addr += sc->offset;
117 void free_symbol_cache(struct symbol_cache *sc)
123 struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
125 struct symbol_cache *sc;
127 if (!sym || strlen(sym) == 0)
130 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
134 sc->symbol = kstrdup(sym, GFP_KERNEL);
140 update_symbol_cache(sc);
146 * Kprobes-specific fetch functions
148 #define DEFINE_FETCH_stack(type) \
149 static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
150 void *offset, void *dest) \
152 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
153 (unsigned int)((unsigned long)offset)); \
155 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
157 DEFINE_BASIC_FETCH_FUNCS(stack)
158 /* No string on the stack entry */
159 #define fetch_stack_string NULL
160 #define fetch_stack_string_size NULL
162 #define DEFINE_FETCH_memory(type) \
163 static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
164 void *addr, void *dest) \
167 if (probe_kernel_address(addr, retval)) \
170 *(type *)dest = retval; \
172 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
174 DEFINE_BASIC_FETCH_FUNCS(memory)
176 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
177 * length and relative data location.
179 static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
180 void *addr, void *dest)
182 int maxlen = get_rloc_len(*(u32 *)dest);
183 u8 *dst = get_rloc_data(dest);
190 * Try to get string again, since the string can be changed while
193 ret = strncpy_from_unsafe(dst, addr, maxlen);
195 if (ret < 0) { /* Failed to fetch string */
197 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
199 *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
202 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
204 /* Return the length of string -- including null terminal byte */
205 static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
206 void *addr, void *dest)
217 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
219 } while (c && ret == 0 && len < MAX_STRING_SIZE);
224 if (ret < 0) /* Failed to check the length */
229 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
231 #define DEFINE_FETCH_symbol(type) \
232 void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
234 struct symbol_cache *sc = data; \
236 fetch_memory_##type(regs, (void *)sc->addr, dest); \
240 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
242 DEFINE_BASIC_FETCH_FUNCS(symbol)
243 DEFINE_FETCH_symbol(string)
244 DEFINE_FETCH_symbol(string_size)
246 /* kprobes don't support file_offset fetch methods */
247 #define fetch_file_offset_u8 NULL
248 #define fetch_file_offset_u16 NULL
249 #define fetch_file_offset_u32 NULL
250 #define fetch_file_offset_u64 NULL
251 #define fetch_file_offset_string NULL
252 #define fetch_file_offset_string_size NULL
254 /* Fetch type information table */
255 static const struct fetch_type kprobes_fetch_type_table[] = {
257 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
258 sizeof(u32), 1, "__data_loc char[]"),
259 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
260 string_size, sizeof(u32), 0, "u32"),
262 ASSIGN_FETCH_TYPE(u8, u8, 0),
263 ASSIGN_FETCH_TYPE(u16, u16, 0),
264 ASSIGN_FETCH_TYPE(u32, u32, 0),
265 ASSIGN_FETCH_TYPE(u64, u64, 0),
266 ASSIGN_FETCH_TYPE(s8, u8, 1),
267 ASSIGN_FETCH_TYPE(s16, u16, 1),
268 ASSIGN_FETCH_TYPE(s32, u32, 1),
269 ASSIGN_FETCH_TYPE(s64, u64, 1),
270 ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
271 ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
272 ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
273 ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
275 ASSIGN_FETCH_TYPE_END
279 * Allocate new trace_probe and initialize it (including kprobes).
281 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
287 int nargs, bool is_return)
289 struct trace_kprobe *tk;
292 tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
296 tk->nhit = alloc_percpu(unsigned long);
301 tk->symbol = kstrdup(symbol, GFP_KERNEL);
304 tk->rp.kp.symbol_name = tk->symbol;
305 tk->rp.kp.offset = offs;
307 tk->rp.kp.addr = addr;
310 tk->rp.handler = kretprobe_dispatcher;
312 tk->rp.kp.pre_handler = kprobe_dispatcher;
314 tk->rp.maxactive = maxactive;
316 if (!event || !is_good_name(event)) {
321 tk->tp.call.class = &tk->tp.class;
322 tk->tp.call.name = kstrdup(event, GFP_KERNEL);
323 if (!tk->tp.call.name)
326 if (!group || !is_good_name(group)) {
331 tk->tp.class.system = kstrdup(group, GFP_KERNEL);
332 if (!tk->tp.class.system)
335 INIT_LIST_HEAD(&tk->list);
336 INIT_LIST_HEAD(&tk->tp.files);
339 kfree(tk->tp.call.name);
341 free_percpu(tk->nhit);
346 static void free_trace_kprobe(struct trace_kprobe *tk)
350 for (i = 0; i < tk->tp.nr_args; i++)
351 traceprobe_free_probe_arg(&tk->tp.args[i]);
353 kfree(tk->tp.call.class->system);
354 kfree(tk->tp.call.name);
356 free_percpu(tk->nhit);
360 static struct trace_kprobe *find_trace_kprobe(const char *event,
363 struct trace_kprobe *tk;
365 list_for_each_entry(tk, &probe_list, list)
366 if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
367 strcmp(tk->tp.call.class->system, group) == 0)
374 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
377 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
379 struct event_file_link *link = NULL;
383 link = kmalloc(sizeof(*link), GFP_KERNEL);
390 list_add_tail_rcu(&link->list, &tk->tp.files);
392 tk->tp.flags |= TP_FLAG_TRACE;
394 tk->tp.flags |= TP_FLAG_PROFILE;
396 if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
397 if (trace_kprobe_is_return(tk))
398 ret = enable_kretprobe(&tk->rp);
400 ret = enable_kprobe(&tk->rp.kp);
405 /* Notice the if is true on not WARN() */
406 if (!WARN_ON_ONCE(!link))
407 list_del_rcu(&link->list);
409 tk->tp.flags &= ~TP_FLAG_TRACE;
411 tk->tp.flags &= ~TP_FLAG_PROFILE;
419 * Disable trace_probe
420 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
423 disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
425 struct event_file_link *link = NULL;
430 link = find_event_file_link(&tk->tp, file);
436 list_del_rcu(&link->list);
438 if (!list_empty(&tk->tp.files))
441 tk->tp.flags &= ~TP_FLAG_TRACE;
443 tk->tp.flags &= ~TP_FLAG_PROFILE;
445 if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
446 if (trace_kprobe_is_return(tk))
447 disable_kretprobe(&tk->rp);
449 disable_kprobe(&tk->rp.kp);
455 * Synchronize with kprobe_trace_func/kretprobe_trace_func
456 * to ensure disabled (all running handlers are finished).
457 * This is not only for kfree(), but also the caller,
458 * trace_remove_event_call() supposes it for releasing
459 * event_call related objects, which will be accessed in
460 * the kprobe_trace_func/kretprobe_trace_func.
463 kfree(link); /* Ignored if link == NULL */
469 /* Internal register function - just handle k*probes and flags */
470 static int __register_trace_kprobe(struct trace_kprobe *tk)
474 if (trace_probe_is_registered(&tk->tp))
477 for (i = 0; i < tk->tp.nr_args; i++)
478 traceprobe_update_arg(&tk->tp.args[i]);
480 /* Set/clear disabled flag according to tp->flag */
481 if (trace_probe_is_enabled(&tk->tp))
482 tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
484 tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
486 if (trace_kprobe_is_return(tk))
487 ret = register_kretprobe(&tk->rp);
489 ret = register_kprobe(&tk->rp.kp);
492 tk->tp.flags |= TP_FLAG_REGISTERED;
494 pr_warn("Could not insert probe at %s+%lu: %d\n",
495 trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
496 if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
497 pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
499 } else if (ret == -EILSEQ) {
500 pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
509 /* Internal unregister function - just handle k*probes and flags */
510 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
512 if (trace_probe_is_registered(&tk->tp)) {
513 if (trace_kprobe_is_return(tk))
514 unregister_kretprobe(&tk->rp);
516 unregister_kprobe(&tk->rp.kp);
517 tk->tp.flags &= ~TP_FLAG_REGISTERED;
518 /* Cleanup kprobe for reuse */
519 if (tk->rp.kp.symbol_name)
520 tk->rp.kp.addr = NULL;
524 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
525 static int unregister_trace_kprobe(struct trace_kprobe *tk)
527 /* Enabled event can not be unregistered */
528 if (trace_probe_is_enabled(&tk->tp))
531 /* Will fail if probe is being used by ftrace or perf */
532 if (unregister_kprobe_event(tk))
535 __unregister_trace_kprobe(tk);
541 /* Register a trace_probe and probe_event */
542 static int register_trace_kprobe(struct trace_kprobe *tk)
544 struct trace_kprobe *old_tk;
547 mutex_lock(&probe_lock);
549 /* Delete old (same name) event if exist */
550 old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
551 tk->tp.call.class->system);
553 ret = unregister_trace_kprobe(old_tk);
556 free_trace_kprobe(old_tk);
559 /* Register new event */
560 ret = register_kprobe_event(tk);
562 pr_warn("Failed to register probe event(%d)\n", ret);
566 /* Register k*probe */
567 ret = __register_trace_kprobe(tk);
569 unregister_kprobe_event(tk);
571 list_add_tail(&tk->list, &probe_list);
574 mutex_unlock(&probe_lock);
578 /* Module notifier call back, checking event on the module */
579 static int trace_kprobe_module_callback(struct notifier_block *nb,
580 unsigned long val, void *data)
582 struct module *mod = data;
583 struct trace_kprobe *tk;
586 if (val != MODULE_STATE_COMING)
589 /* Update probes on coming module */
590 mutex_lock(&probe_lock);
591 list_for_each_entry(tk, &probe_list, list) {
592 if (trace_kprobe_within_module(tk, mod)) {
593 /* Don't need to check busy - this should have gone. */
594 __unregister_trace_kprobe(tk);
595 ret = __register_trace_kprobe(tk);
597 pr_warn("Failed to re-register probe %s on %s: %d\n",
598 trace_event_name(&tk->tp.call),
602 mutex_unlock(&probe_lock);
607 static struct notifier_block trace_kprobe_module_nb = {
608 .notifier_call = trace_kprobe_module_callback,
609 .priority = 1 /* Invoked after kprobe module callback */
612 /* Convert certain expected symbols into '_' when generating event names */
613 static inline void sanitize_event_name(char *name)
615 while (*name++ != '\0')
616 if (*name == ':' || *name == '.')
620 static int create_trace_kprobe(int argc, char **argv)
625 * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
627 * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
629 * $retval : fetch return value
630 * $stack : fetch stack address
631 * $stackN : fetch Nth of stack (N:0-)
632 * $comm : fetch current task comm
633 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
634 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
635 * %REG : fetch register REG
636 * Dereferencing memory fetch:
637 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
638 * Alias name of args:
639 * NAME=FETCHARG : set NAME as alias of FETCHARG.
641 * FETCHARG:TYPE : use TYPE instead of unsigned long.
643 struct trace_kprobe *tk;
645 bool is_return = false, is_delete = false;
646 char *symbol = NULL, *event = NULL, *group = NULL;
651 char buf[MAX_EVENT_NAME_LEN];
653 /* argc must be >= 1 */
654 if (argv[0][0] == 'p')
656 else if (argv[0][0] == 'r')
658 else if (argv[0][0] == '-')
661 pr_info("Probe definition must be started with 'p', 'r' or"
666 event = strchr(&argv[0][1], ':');
671 if (is_return && isdigit(argv[0][1])) {
672 ret = kstrtouint(&argv[0][1], 0, &maxactive);
674 pr_info("Failed to parse maxactive.\n");
677 /* kretprobes instances are iterated over via a list. The
678 * maximum should stay reasonable.
680 if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
681 pr_info("Maxactive is too big (%d > %d).\n",
682 maxactive, KRETPROBE_MAXACTIVE_MAX);
688 if (strchr(event, '/')) {
690 event = strchr(group, '/') + 1;
692 if (strlen(group) == 0) {
693 pr_info("Group name is not specified\n");
697 if (strlen(event) == 0) {
698 pr_info("Event name is not specified\n");
703 group = KPROBE_EVENT_SYSTEM;
707 pr_info("Delete command needs an event name.\n");
710 mutex_lock(&probe_lock);
711 tk = find_trace_kprobe(event, group);
713 mutex_unlock(&probe_lock);
714 pr_info("Event %s/%s doesn't exist.\n", group, event);
717 /* delete an event */
718 ret = unregister_trace_kprobe(tk);
720 free_trace_kprobe(tk);
721 mutex_unlock(&probe_lock);
726 pr_info("Probe point is not specified.\n");
730 /* try to parse an address. if that fails, try to read the
731 * input as a symbol. */
732 if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
733 /* a symbol specified */
735 /* TODO: support .init module functions */
736 ret = traceprobe_split_symbol_offset(symbol, &offset);
737 if (ret || offset < 0 || offset > UINT_MAX) {
738 pr_info("Failed to parse either an address or a symbol.\n");
741 if (offset && is_return &&
742 !kprobe_on_func_entry(NULL, symbol, offset)) {
743 pr_info("Given offset is not valid for return probe.\n");
747 argc -= 2; argv += 2;
751 /* Make a new event name */
753 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
754 is_return ? 'r' : 'p', symbol, offset);
756 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
757 is_return ? 'r' : 'p', addr);
758 sanitize_event_name(buf);
761 tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
764 pr_info("Failed to allocate trace_probe.(%d)\n",
769 /* parse arguments */
771 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
772 struct probe_arg *parg = &tk->tp.args[i];
774 /* Increment count for freeing args in error case */
777 /* Parse argument name */
778 arg = strchr(argv[i], '=');
781 parg->name = kstrdup(argv[i], GFP_KERNEL);
784 /* If argument name is omitted, set "argN" */
785 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
786 parg->name = kstrdup(buf, GFP_KERNEL);
790 pr_info("Failed to allocate argument[%d] name.\n", i);
795 if (!is_good_name(parg->name)) {
796 pr_info("Invalid argument[%d] name: %s\n",
802 if (traceprobe_conflict_field_name(parg->name,
804 pr_info("Argument[%d] name '%s' conflicts with "
805 "another field.\n", i, argv[i]);
810 /* Parse fetch argument */
811 ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
813 kprobes_fetch_type_table);
815 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
820 ret = register_trace_kprobe(tk);
826 free_trace_kprobe(tk);
830 static int release_all_trace_kprobes(void)
832 struct trace_kprobe *tk;
835 mutex_lock(&probe_lock);
836 /* Ensure no probe is in use. */
837 list_for_each_entry(tk, &probe_list, list)
838 if (trace_probe_is_enabled(&tk->tp)) {
842 /* TODO: Use batch unregistration */
843 while (!list_empty(&probe_list)) {
844 tk = list_entry(probe_list.next, struct trace_kprobe, list);
845 ret = unregister_trace_kprobe(tk);
848 free_trace_kprobe(tk);
852 mutex_unlock(&probe_lock);
857 /* Probes listing interfaces */
858 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
860 mutex_lock(&probe_lock);
861 return seq_list_start(&probe_list, *pos);
864 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
866 return seq_list_next(v, &probe_list, pos);
869 static void probes_seq_stop(struct seq_file *m, void *v)
871 mutex_unlock(&probe_lock);
874 static int probes_seq_show(struct seq_file *m, void *v)
876 struct trace_kprobe *tk = v;
879 seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
880 if (trace_kprobe_is_return(tk) && tk->rp.maxactive)
881 seq_printf(m, "%d", tk->rp.maxactive);
882 seq_printf(m, ":%s/%s", tk->tp.call.class->system,
883 trace_event_name(&tk->tp.call));
886 seq_printf(m, " 0x%p", tk->rp.kp.addr);
887 else if (tk->rp.kp.offset)
888 seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
891 seq_printf(m, " %s", trace_kprobe_symbol(tk));
893 for (i = 0; i < tk->tp.nr_args; i++)
894 seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
900 static const struct seq_operations probes_seq_op = {
901 .start = probes_seq_start,
902 .next = probes_seq_next,
903 .stop = probes_seq_stop,
904 .show = probes_seq_show
907 static int probes_open(struct inode *inode, struct file *file)
911 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
912 ret = release_all_trace_kprobes();
917 return seq_open(file, &probes_seq_op);
920 static ssize_t probes_write(struct file *file, const char __user *buffer,
921 size_t count, loff_t *ppos)
923 return traceprobe_probes_write(file, buffer, count, ppos,
924 create_trace_kprobe);
927 static const struct file_operations kprobe_events_ops = {
928 .owner = THIS_MODULE,
932 .release = seq_release,
933 .write = probes_write,
936 /* Probes profiling interfaces */
937 static int probes_profile_seq_show(struct seq_file *m, void *v)
939 struct trace_kprobe *tk = v;
941 seq_printf(m, " %-44s %15lu %15lu\n",
942 trace_event_name(&tk->tp.call),
943 trace_kprobe_nhit(tk),
949 static const struct seq_operations profile_seq_op = {
950 .start = probes_seq_start,
951 .next = probes_seq_next,
952 .stop = probes_seq_stop,
953 .show = probes_profile_seq_show
956 static int profile_open(struct inode *inode, struct file *file)
958 return seq_open(file, &profile_seq_op);
961 static const struct file_operations kprobe_profile_ops = {
962 .owner = THIS_MODULE,
963 .open = profile_open,
966 .release = seq_release,
970 static nokprobe_inline void
971 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
972 struct trace_event_file *trace_file)
974 struct kprobe_trace_entry_head *entry;
975 struct ring_buffer_event *event;
976 struct ring_buffer *buffer;
978 unsigned long irq_flags;
979 struct trace_event_call *call = &tk->tp.call;
981 WARN_ON(call != trace_file->event_call);
983 if (trace_trigger_soft_disabled(trace_file))
986 local_save_flags(irq_flags);
987 pc = preempt_count();
989 dsize = __get_data_size(&tk->tp, regs);
990 size = sizeof(*entry) + tk->tp.size + dsize;
992 event = trace_event_buffer_lock_reserve(&buffer, trace_file,
994 size, irq_flags, pc);
998 entry = ring_buffer_event_data(event);
999 entry->ip = (unsigned long)tk->rp.kp.addr;
1000 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1002 event_trigger_unlock_commit_regs(trace_file, buffer, event,
1003 entry, irq_flags, pc, regs);
1007 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1009 struct event_file_link *link;
1011 list_for_each_entry_rcu(link, &tk->tp.files, list)
1012 __kprobe_trace_func(tk, regs, link->file);
1014 NOKPROBE_SYMBOL(kprobe_trace_func);
1016 /* Kretprobe handler */
1017 static nokprobe_inline void
1018 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1019 struct pt_regs *regs,
1020 struct trace_event_file *trace_file)
1022 struct kretprobe_trace_entry_head *entry;
1023 struct ring_buffer_event *event;
1024 struct ring_buffer *buffer;
1025 int size, pc, dsize;
1026 unsigned long irq_flags;
1027 struct trace_event_call *call = &tk->tp.call;
1029 WARN_ON(call != trace_file->event_call);
1031 if (trace_trigger_soft_disabled(trace_file))
1034 local_save_flags(irq_flags);
1035 pc = preempt_count();
1037 dsize = __get_data_size(&tk->tp, regs);
1038 size = sizeof(*entry) + tk->tp.size + dsize;
1040 event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1042 size, irq_flags, pc);
1046 entry = ring_buffer_event_data(event);
1047 entry->func = (unsigned long)tk->rp.kp.addr;
1048 entry->ret_ip = (unsigned long)ri->ret_addr;
1049 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1051 event_trigger_unlock_commit_regs(trace_file, buffer, event,
1052 entry, irq_flags, pc, regs);
1056 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1057 struct pt_regs *regs)
1059 struct event_file_link *link;
1061 list_for_each_entry_rcu(link, &tk->tp.files, list)
1062 __kretprobe_trace_func(tk, ri, regs, link->file);
1064 NOKPROBE_SYMBOL(kretprobe_trace_func);
1066 /* Event entry printers */
1067 static enum print_line_t
1068 print_kprobe_event(struct trace_iterator *iter, int flags,
1069 struct trace_event *event)
1071 struct kprobe_trace_entry_head *field;
1072 struct trace_seq *s = &iter->seq;
1073 struct trace_probe *tp;
1077 field = (struct kprobe_trace_entry_head *)iter->ent;
1078 tp = container_of(event, struct trace_probe, call.event);
1080 trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1082 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1085 trace_seq_putc(s, ')');
1087 data = (u8 *)&field[1];
1088 for (i = 0; i < tp->nr_args; i++)
1089 if (!tp->args[i].type->print(s, tp->args[i].name,
1090 data + tp->args[i].offset, field))
1093 trace_seq_putc(s, '\n');
1095 return trace_handle_return(s);
1098 static enum print_line_t
1099 print_kretprobe_event(struct trace_iterator *iter, int flags,
1100 struct trace_event *event)
1102 struct kretprobe_trace_entry_head *field;
1103 struct trace_seq *s = &iter->seq;
1104 struct trace_probe *tp;
1108 field = (struct kretprobe_trace_entry_head *)iter->ent;
1109 tp = container_of(event, struct trace_probe, call.event);
1111 trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1113 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1116 trace_seq_puts(s, " <- ");
1118 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1121 trace_seq_putc(s, ')');
1123 data = (u8 *)&field[1];
1124 for (i = 0; i < tp->nr_args; i++)
1125 if (!tp->args[i].type->print(s, tp->args[i].name,
1126 data + tp->args[i].offset, field))
1129 trace_seq_putc(s, '\n');
1132 return trace_handle_return(s);
1136 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1139 struct kprobe_trace_entry_head field;
1140 struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1142 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1143 /* Set argument names as fields */
1144 for (i = 0; i < tk->tp.nr_args; i++) {
1145 struct probe_arg *parg = &tk->tp.args[i];
1147 ret = trace_define_field(event_call, parg->type->fmttype,
1149 sizeof(field) + parg->offset,
1151 parg->type->is_signed,
1159 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1162 struct kretprobe_trace_entry_head field;
1163 struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1165 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1166 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1167 /* Set argument names as fields */
1168 for (i = 0; i < tk->tp.nr_args; i++) {
1169 struct probe_arg *parg = &tk->tp.args[i];
1171 ret = trace_define_field(event_call, parg->type->fmttype,
1173 sizeof(field) + parg->offset,
1175 parg->type->is_signed,
1183 #ifdef CONFIG_PERF_EVENTS
1185 /* Kprobe profile handler */
1187 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1189 struct trace_event_call *call = &tk->tp.call;
1190 struct bpf_prog *prog = call->prog;
1191 struct kprobe_trace_entry_head *entry;
1192 struct hlist_head *head;
1193 int size, __size, dsize;
1196 if (prog && !trace_call_bpf(prog, regs))
1199 head = this_cpu_ptr(call->perf_events);
1200 if (hlist_empty(head))
1203 dsize = __get_data_size(&tk->tp, regs);
1204 __size = sizeof(*entry) + tk->tp.size + dsize;
1205 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1206 size -= sizeof(u32);
1208 entry = perf_trace_buf_alloc(size, NULL, &rctx);
1212 entry->ip = (unsigned long)tk->rp.kp.addr;
1213 memset(&entry[1], 0, dsize);
1214 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1215 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1218 NOKPROBE_SYMBOL(kprobe_perf_func);
1220 /* Kretprobe profile handler */
1222 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1223 struct pt_regs *regs)
1225 struct trace_event_call *call = &tk->tp.call;
1226 struct bpf_prog *prog = call->prog;
1227 struct kretprobe_trace_entry_head *entry;
1228 struct hlist_head *head;
1229 int size, __size, dsize;
1232 if (prog && !trace_call_bpf(prog, regs))
1235 head = this_cpu_ptr(call->perf_events);
1236 if (hlist_empty(head))
1239 dsize = __get_data_size(&tk->tp, regs);
1240 __size = sizeof(*entry) + tk->tp.size + dsize;
1241 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1242 size -= sizeof(u32);
1244 entry = perf_trace_buf_alloc(size, NULL, &rctx);
1248 entry->func = (unsigned long)tk->rp.kp.addr;
1249 entry->ret_ip = (unsigned long)ri->ret_addr;
1250 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1251 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1254 NOKPROBE_SYMBOL(kretprobe_perf_func);
1255 #endif /* CONFIG_PERF_EVENTS */
1258 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1260 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1261 * lockless, but we can't race with this __init function.
1263 static int kprobe_register(struct trace_event_call *event,
1264 enum trace_reg type, void *data)
1266 struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1267 struct trace_event_file *file = data;
1270 case TRACE_REG_REGISTER:
1271 return enable_trace_kprobe(tk, file);
1272 case TRACE_REG_UNREGISTER:
1273 return disable_trace_kprobe(tk, file);
1275 #ifdef CONFIG_PERF_EVENTS
1276 case TRACE_REG_PERF_REGISTER:
1277 return enable_trace_kprobe(tk, NULL);
1278 case TRACE_REG_PERF_UNREGISTER:
1279 return disable_trace_kprobe(tk, NULL);
1280 case TRACE_REG_PERF_OPEN:
1281 case TRACE_REG_PERF_CLOSE:
1282 case TRACE_REG_PERF_ADD:
1283 case TRACE_REG_PERF_DEL:
1290 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1292 struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1294 raw_cpu_inc(*tk->nhit);
1296 if (tk->tp.flags & TP_FLAG_TRACE)
1297 kprobe_trace_func(tk, regs);
1298 #ifdef CONFIG_PERF_EVENTS
1299 if (tk->tp.flags & TP_FLAG_PROFILE)
1300 kprobe_perf_func(tk, regs);
1302 return 0; /* We don't tweek kernel, so just return 0 */
1304 NOKPROBE_SYMBOL(kprobe_dispatcher);
1307 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1309 struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1311 raw_cpu_inc(*tk->nhit);
1313 if (tk->tp.flags & TP_FLAG_TRACE)
1314 kretprobe_trace_func(tk, ri, regs);
1315 #ifdef CONFIG_PERF_EVENTS
1316 if (tk->tp.flags & TP_FLAG_PROFILE)
1317 kretprobe_perf_func(tk, ri, regs);
1319 return 0; /* We don't tweek kernel, so just return 0 */
1321 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1323 static struct trace_event_functions kretprobe_funcs = {
1324 .trace = print_kretprobe_event
1327 static struct trace_event_functions kprobe_funcs = {
1328 .trace = print_kprobe_event
1331 static int register_kprobe_event(struct trace_kprobe *tk)
1333 struct trace_event_call *call = &tk->tp.call;
1336 /* Initialize trace_event_call */
1337 INIT_LIST_HEAD(&call->class->fields);
1338 if (trace_kprobe_is_return(tk)) {
1339 call->event.funcs = &kretprobe_funcs;
1340 call->class->define_fields = kretprobe_event_define_fields;
1342 call->event.funcs = &kprobe_funcs;
1343 call->class->define_fields = kprobe_event_define_fields;
1345 if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1347 ret = register_trace_event(&call->event);
1349 kfree(call->print_fmt);
1352 call->flags = TRACE_EVENT_FL_KPROBE;
1353 call->class->reg = kprobe_register;
1355 ret = trace_add_event_call(call);
1357 pr_info("Failed to register kprobe event: %s\n",
1358 trace_event_name(call));
1359 kfree(call->print_fmt);
1360 unregister_trace_event(&call->event);
1365 static int unregister_kprobe_event(struct trace_kprobe *tk)
1369 /* tp->event is unregistered in trace_remove_event_call() */
1370 ret = trace_remove_event_call(&tk->tp.call);
1372 kfree(tk->tp.call.print_fmt);
1376 /* Make a tracefs interface for controlling probe points */
1377 static __init int init_kprobe_trace(void)
1379 struct dentry *d_tracer;
1380 struct dentry *entry;
1382 if (register_module_notifier(&trace_kprobe_module_nb))
1385 d_tracer = tracing_init_dentry();
1386 if (IS_ERR(d_tracer))
1389 entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1390 NULL, &kprobe_events_ops);
1392 /* Event list interface */
1394 pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1396 /* Profile interface */
1397 entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1398 NULL, &kprobe_profile_ops);
1401 pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1404 fs_initcall(init_kprobe_trace);
1407 #ifdef CONFIG_FTRACE_STARTUP_TEST
1409 * The "__used" keeps gcc from removing the function symbol
1410 * from the kallsyms table. 'noinline' makes sure that there
1411 * isn't an inlined version used by the test method below
1413 static __used __init noinline int
1414 kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
1416 return a1 + a2 + a3 + a4 + a5 + a6;
1419 static __init struct trace_event_file *
1420 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1422 struct trace_event_file *file;
1424 list_for_each_entry(file, &tr->events, list)
1425 if (file->event_call == &tk->tp.call)
1432 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1433 * stage, we can do this lockless.
1435 static __init int kprobe_trace_self_tests_init(void)
1438 int (*target)(int, int, int, int, int, int);
1439 struct trace_kprobe *tk;
1440 struct trace_event_file *file;
1442 if (tracing_is_disabled())
1445 target = kprobe_trace_selftest_target;
1447 pr_info("Testing kprobe tracing: ");
1449 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
1450 "$stack $stack0 +0($stack)",
1451 create_trace_kprobe);
1452 if (WARN_ON_ONCE(ret)) {
1453 pr_warn("error on probing function entry.\n");
1456 /* Enable trace point */
1457 tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1458 if (WARN_ON_ONCE(tk == NULL)) {
1459 pr_warn("error on getting new probe.\n");
1462 file = find_trace_probe_file(tk, top_trace_array());
1463 if (WARN_ON_ONCE(file == NULL)) {
1464 pr_warn("error on getting probe file.\n");
1467 enable_trace_kprobe(tk, file);
1471 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1472 "$retval", create_trace_kprobe);
1473 if (WARN_ON_ONCE(ret)) {
1474 pr_warn("error on probing function return.\n");
1477 /* Enable trace point */
1478 tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1479 if (WARN_ON_ONCE(tk == NULL)) {
1480 pr_warn("error on getting 2nd new probe.\n");
1483 file = find_trace_probe_file(tk, top_trace_array());
1484 if (WARN_ON_ONCE(file == NULL)) {
1485 pr_warn("error on getting probe file.\n");
1488 enable_trace_kprobe(tk, file);
1495 ret = target(1, 2, 3, 4, 5, 6);
1498 * Not expecting an error here, the check is only to prevent the
1499 * optimizer from removing the call to target() as otherwise there
1500 * are no side-effects and the call is never performed.
1505 /* Disable trace points before removing it */
1506 tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1507 if (WARN_ON_ONCE(tk == NULL)) {
1508 pr_warn("error on getting test probe.\n");
1511 if (trace_kprobe_nhit(tk) != 1) {
1512 pr_warn("incorrect number of testprobe hits\n");
1516 file = find_trace_probe_file(tk, top_trace_array());
1517 if (WARN_ON_ONCE(file == NULL)) {
1518 pr_warn("error on getting probe file.\n");
1521 disable_trace_kprobe(tk, file);
1524 tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1525 if (WARN_ON_ONCE(tk == NULL)) {
1526 pr_warn("error on getting 2nd test probe.\n");
1529 if (trace_kprobe_nhit(tk) != 1) {
1530 pr_warn("incorrect number of testprobe2 hits\n");
1534 file = find_trace_probe_file(tk, top_trace_array());
1535 if (WARN_ON_ONCE(file == NULL)) {
1536 pr_warn("error on getting probe file.\n");
1539 disable_trace_kprobe(tk, file);
1542 ret = traceprobe_command("-:testprobe", create_trace_kprobe);
1543 if (WARN_ON_ONCE(ret)) {
1544 pr_warn("error on deleting a probe.\n");
1548 ret = traceprobe_command("-:testprobe2", create_trace_kprobe);
1549 if (WARN_ON_ONCE(ret)) {
1550 pr_warn("error on deleting a probe.\n");
1555 release_all_trace_kprobes();
1557 * Wait for the optimizer work to finish. Otherwise it might fiddle
1558 * with probes in already freed __init text.
1560 wait_for_kprobe_optimizer();
1562 pr_cont("NG: Some tests are failed. Please check them.\n");
1568 late_initcall(kprobe_trace_self_tests_init);