085f056e66f193b46e8e3ac7c9214fbff6030e58
[releases.git] / trace_eprobe.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * event probes
4  *
5  * Part of this code was copied from kernel/trace/trace_kprobe.c written by
6  * Masami Hiramatsu <mhiramat@kernel.org>
7  *
8  * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
9  * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
10  *
11  */
12 #include <linux/module.h>
13 #include <linux/mutex.h>
14 #include <linux/ftrace.h>
15
16 #include "trace_dynevent.h"
17 #include "trace_probe.h"
18 #include "trace_probe_tmpl.h"
19 #include "trace_probe_kernel.h"
20
21 #define EPROBE_EVENT_SYSTEM "eprobes"
22
23 struct trace_eprobe {
24         /* tracepoint system */
25         const char *event_system;
26
27         /* tracepoint event */
28         const char *event_name;
29
30         struct trace_event_call *event;
31
32         struct dyn_event        devent;
33         struct trace_probe      tp;
34 };
35
36 struct eprobe_data {
37         struct trace_event_file *file;
38         struct trace_eprobe     *ep;
39 };
40
41 static int __trace_eprobe_create(int argc, const char *argv[]);
42
43 static void trace_event_probe_cleanup(struct trace_eprobe *ep)
44 {
45         if (!ep)
46                 return;
47         trace_probe_cleanup(&ep->tp);
48         kfree(ep->event_name);
49         kfree(ep->event_system);
50         if (ep->event)
51                 trace_event_put_ref(ep->event);
52         kfree(ep);
53 }
54
55 static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
56 {
57         return container_of(ev, struct trace_eprobe, devent);
58 }
59
60 static int eprobe_dyn_event_create(const char *raw_command)
61 {
62         return trace_probe_create(raw_command, __trace_eprobe_create);
63 }
64
65 static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
66 {
67         struct trace_eprobe *ep = to_trace_eprobe(ev);
68         int i;
69
70         seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
71                                 trace_probe_name(&ep->tp));
72         seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
73
74         for (i = 0; i < ep->tp.nr_args; i++)
75                 seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
76         seq_putc(m, '\n');
77
78         return 0;
79 }
80
81 static int unregister_trace_eprobe(struct trace_eprobe *ep)
82 {
83         /* If other probes are on the event, just unregister eprobe */
84         if (trace_probe_has_sibling(&ep->tp))
85                 goto unreg;
86
87         /* Enabled event can not be unregistered */
88         if (trace_probe_is_enabled(&ep->tp))
89                 return -EBUSY;
90
91         /* Will fail if probe is being used by ftrace or perf */
92         if (trace_probe_unregister_event_call(&ep->tp))
93                 return -EBUSY;
94
95 unreg:
96         dyn_event_remove(&ep->devent);
97         trace_probe_unlink(&ep->tp);
98
99         return 0;
100 }
101
102 static int eprobe_dyn_event_release(struct dyn_event *ev)
103 {
104         struct trace_eprobe *ep = to_trace_eprobe(ev);
105         int ret = unregister_trace_eprobe(ep);
106
107         if (!ret)
108                 trace_event_probe_cleanup(ep);
109         return ret;
110 }
111
112 static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
113 {
114         struct trace_eprobe *ep = to_trace_eprobe(ev);
115
116         return trace_probe_is_enabled(&ep->tp);
117 }
118
119 static bool eprobe_dyn_event_match(const char *system, const char *event,
120                         int argc, const char **argv, struct dyn_event *ev)
121 {
122         struct trace_eprobe *ep = to_trace_eprobe(ev);
123         const char *slash;
124
125         /*
126          * We match the following:
127          *  event only                  - match all eprobes with event name
128          *  system and event only       - match all system/event probes
129          *
130          * The below has the above satisfied with more arguments:
131          *
132          *  attached system/event       - If the arg has the system and event
133          *                                the probe is attached to, match
134          *                                probes with the attachment.
135          *
136          *  If any more args are given, then it requires a full match.
137          */
138
139         /*
140          * If system exists, but this probe is not part of that system
141          * do not match.
142          */
143         if (system && strcmp(trace_probe_group_name(&ep->tp), system) != 0)
144                 return false;
145
146         /* Must match the event name */
147         if (strcmp(trace_probe_name(&ep->tp), event) != 0)
148                 return false;
149
150         /* No arguments match all */
151         if (argc < 1)
152                 return true;
153
154         /* First argument is the system/event the probe is attached to */
155
156         slash = strchr(argv[0], '/');
157         if (!slash)
158                 slash = strchr(argv[0], '.');
159         if (!slash)
160                 return false;
161
162         if (strncmp(ep->event_system, argv[0], slash - argv[0]))
163                 return false;
164         if (strcmp(ep->event_name, slash + 1))
165                 return false;
166
167         argc--;
168         argv++;
169
170         /* If there are no other args, then match */
171         if (argc < 1)
172                 return true;
173
174         return trace_probe_match_command_args(&ep->tp, argc, argv);
175 }
176
177 static struct dyn_event_operations eprobe_dyn_event_ops = {
178         .create = eprobe_dyn_event_create,
179         .show = eprobe_dyn_event_show,
180         .is_busy = eprobe_dyn_event_is_busy,
181         .free = eprobe_dyn_event_release,
182         .match = eprobe_dyn_event_match,
183 };
184
185 static struct trace_eprobe *alloc_event_probe(const char *group,
186                                               const char *this_event,
187                                               struct trace_event_call *event,
188                                               int nargs)
189 {
190         struct trace_eprobe *ep;
191         const char *event_name;
192         const char *sys_name;
193         int ret = -ENOMEM;
194
195         if (!event)
196                 return ERR_PTR(-ENODEV);
197
198         sys_name = event->class->system;
199         event_name = trace_event_name(event);
200
201         ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
202         if (!ep) {
203                 trace_event_put_ref(event);
204                 goto error;
205         }
206         ep->event = event;
207         ep->event_name = kstrdup(event_name, GFP_KERNEL);
208         if (!ep->event_name)
209                 goto error;
210         ep->event_system = kstrdup(sys_name, GFP_KERNEL);
211         if (!ep->event_system)
212                 goto error;
213
214         ret = trace_probe_init(&ep->tp, this_event, group, false);
215         if (ret < 0)
216                 goto error;
217
218         dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
219         return ep;
220 error:
221         trace_event_probe_cleanup(ep);
222         return ERR_PTR(ret);
223 }
224
225 static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
226 {
227         struct probe_arg *parg = &ep->tp.args[i];
228         struct ftrace_event_field *field;
229         struct list_head *head;
230         int ret = -ENOENT;
231
232         head = trace_get_fields(ep->event);
233         list_for_each_entry(field, head, link) {
234                 if (!strcmp(parg->code->data, field->name)) {
235                         kfree(parg->code->data);
236                         parg->code->data = field;
237                         return 0;
238                 }
239         }
240
241         /*
242          * Argument not found on event. But allow for comm and COMM
243          * to be used to get the current->comm.
244          */
245         if (strcmp(parg->code->data, "COMM") == 0 ||
246             strcmp(parg->code->data, "comm") == 0) {
247                 parg->code->op = FETCH_OP_COMM;
248                 ret = 0;
249         }
250
251         kfree(parg->code->data);
252         parg->code->data = NULL;
253         return ret;
254 }
255
256 static int eprobe_event_define_fields(struct trace_event_call *event_call)
257 {
258         int ret;
259         struct eprobe_trace_entry_head field;
260         struct trace_probe *tp;
261
262         tp = trace_probe_primary_from_call(event_call);
263         if (WARN_ON_ONCE(!tp))
264                 return -ENOENT;
265
266         DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
267
268         return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
269 }
270
271 static struct trace_event_fields eprobe_fields_array[] = {
272         { .type = TRACE_FUNCTION_TYPE,
273           .define_fields = eprobe_event_define_fields },
274         {}
275 };
276
277 /* Event entry printers */
278 static enum print_line_t
279 print_eprobe_event(struct trace_iterator *iter, int flags,
280                    struct trace_event *event)
281 {
282         struct eprobe_trace_entry_head *field;
283         struct trace_event_call *pevent;
284         struct trace_event *probed_event;
285         struct trace_seq *s = &iter->seq;
286         struct trace_probe *tp;
287
288         field = (struct eprobe_trace_entry_head *)iter->ent;
289         tp = trace_probe_primary_from_call(
290                 container_of(event, struct trace_event_call, event));
291         if (WARN_ON_ONCE(!tp))
292                 goto out;
293
294         trace_seq_printf(s, "%s: (", trace_probe_name(tp));
295
296         probed_event = ftrace_find_event(field->type);
297         if (probed_event) {
298                 pevent = container_of(probed_event, struct trace_event_call, event);
299                 trace_seq_printf(s, "%s.%s", pevent->class->system,
300                                  trace_event_name(pevent));
301         } else {
302                 trace_seq_printf(s, "%u", field->type);
303         }
304
305         trace_seq_putc(s, ')');
306
307         if (print_probe_args(s, tp->args, tp->nr_args,
308                              (u8 *)&field[1], field) < 0)
309                 goto out;
310
311         trace_seq_putc(s, '\n');
312  out:
313         return trace_handle_return(s);
314 }
315
316 static unsigned long get_event_field(struct fetch_insn *code, void *rec)
317 {
318         struct ftrace_event_field *field = code->data;
319         unsigned long val;
320         void *addr;
321
322         addr = rec + field->offset;
323
324         if (is_string_field(field)) {
325                 switch (field->filter_type) {
326                 case FILTER_DYN_STRING:
327                         val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
328                         break;
329                 case FILTER_STATIC_STRING:
330                         val = (unsigned long)addr;
331                         break;
332                 case FILTER_PTR_STRING:
333                         val = (unsigned long)(*(char *)addr);
334                         break;
335                 default:
336                         WARN_ON_ONCE(1);
337                         return 0;
338                 }
339                 return val;
340         }
341
342         switch (field->size) {
343         case 1:
344                 if (field->is_signed)
345                         val = *(char *)addr;
346                 else
347                         val = *(unsigned char *)addr;
348                 break;
349         case 2:
350                 if (field->is_signed)
351                         val = *(short *)addr;
352                 else
353                         val = *(unsigned short *)addr;
354                 break;
355         case 4:
356                 if (field->is_signed)
357                         val = *(int *)addr;
358                 else
359                         val = *(unsigned int *)addr;
360                 break;
361         default:
362                 if (field->is_signed)
363                         val = *(long *)addr;
364                 else
365                         val = *(unsigned long *)addr;
366                 break;
367         }
368         return val;
369 }
370
371 static int get_eprobe_size(struct trace_probe *tp, void *rec)
372 {
373         struct fetch_insn *code;
374         struct probe_arg *arg;
375         int i, len, ret = 0;
376
377         for (i = 0; i < tp->nr_args; i++) {
378                 arg = tp->args + i;
379                 if (arg->dynamic) {
380                         unsigned long val;
381
382                         code = arg->code;
383  retry:
384                         switch (code->op) {
385                         case FETCH_OP_TP_ARG:
386                                 val = get_event_field(code, rec);
387                                 break;
388                         case FETCH_OP_IMM:
389                                 val = code->immediate;
390                                 break;
391                         case FETCH_OP_COMM:
392                                 val = (unsigned long)current->comm;
393                                 break;
394                         case FETCH_OP_DATA:
395                                 val = (unsigned long)code->data;
396                                 break;
397                         case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
398                                 code++;
399                                 goto retry;
400                         default:
401                                 continue;
402                         }
403                         code++;
404                         len = process_fetch_insn_bottom(code, val, NULL, NULL);
405                         if (len > 0)
406                                 ret += len;
407                 }
408         }
409
410         return ret;
411 }
412
413 /* Kprobe specific fetch functions */
414
415 /* Note that we don't verify it, since the code does not come from user space */
416 static int
417 process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
418                    void *base)
419 {
420         unsigned long val;
421
422  retry:
423         switch (code->op) {
424         case FETCH_OP_TP_ARG:
425                 val = get_event_field(code, rec);
426                 break;
427         case FETCH_OP_IMM:
428                 val = code->immediate;
429                 break;
430         case FETCH_OP_COMM:
431                 val = (unsigned long)current->comm;
432                 break;
433         case FETCH_OP_DATA:
434                 val = (unsigned long)code->data;
435                 break;
436         case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
437                 code++;
438                 goto retry;
439         default:
440                 return -EILSEQ;
441         }
442         code++;
443         return process_fetch_insn_bottom(code, val, dest, base);
444 }
445 NOKPROBE_SYMBOL(process_fetch_insn)
446
447 /* Return the length of string -- including null terminal byte */
448 static nokprobe_inline int
449 fetch_store_strlen_user(unsigned long addr)
450 {
451         return kern_fetch_store_strlen_user(addr);
452 }
453
454 /* Return the length of string -- including null terminal byte */
455 static nokprobe_inline int
456 fetch_store_strlen(unsigned long addr)
457 {
458         return kern_fetch_store_strlen(addr);
459 }
460
461 /*
462  * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
463  * with max length and relative data location.
464  */
465 static nokprobe_inline int
466 fetch_store_string_user(unsigned long addr, void *dest, void *base)
467 {
468         return kern_fetch_store_string_user(addr, dest, base);
469 }
470
471 /*
472  * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
473  * length and relative data location.
474  */
475 static nokprobe_inline int
476 fetch_store_string(unsigned long addr, void *dest, void *base)
477 {
478         return kern_fetch_store_string(addr, dest, base);
479 }
480
481 static nokprobe_inline int
482 probe_mem_read_user(void *dest, void *src, size_t size)
483 {
484         const void __user *uaddr =  (__force const void __user *)src;
485
486         return copy_from_user_nofault(dest, uaddr, size);
487 }
488
489 static nokprobe_inline int
490 probe_mem_read(void *dest, void *src, size_t size)
491 {
492 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
493         if ((unsigned long)src < TASK_SIZE)
494                 return probe_mem_read_user(dest, src, size);
495 #endif
496         return copy_from_kernel_nofault(dest, src, size);
497 }
498
499 /* eprobe handler */
500 static inline void
501 __eprobe_trace_func(struct eprobe_data *edata, void *rec)
502 {
503         struct eprobe_trace_entry_head *entry;
504         struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
505         struct trace_event_buffer fbuffer;
506         int dsize;
507
508         if (WARN_ON_ONCE(call != edata->file->event_call))
509                 return;
510
511         if (trace_trigger_soft_disabled(edata->file))
512                 return;
513
514         fbuffer.trace_ctx = tracing_gen_ctx();
515         fbuffer.trace_file = edata->file;
516
517         dsize = get_eprobe_size(&edata->ep->tp, rec);
518         fbuffer.regs = NULL;
519
520         fbuffer.event =
521                 trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
522                                         call->event.type,
523                                         sizeof(*entry) + edata->ep->tp.size + dsize,
524                                         fbuffer.trace_ctx);
525         if (!fbuffer.event)
526                 return;
527
528         entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
529         if (edata->ep->event)
530                 entry->type = edata->ep->event->event.type;
531         else
532                 entry->type = 0;
533         store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
534
535         trace_event_buffer_commit(&fbuffer);
536 }
537
538 /*
539  * The event probe implementation uses event triggers to get access to
540  * the event it is attached to, but is not an actual trigger. The below
541  * functions are just stubs to fulfill what is needed to use the trigger
542  * infrastructure.
543  */
544 static int eprobe_trigger_init(struct event_trigger_ops *ops,
545                                struct event_trigger_data *data)
546 {
547         return 0;
548 }
549
550 static void eprobe_trigger_free(struct event_trigger_ops *ops,
551                                 struct event_trigger_data *data)
552 {
553
554 }
555
556 static int eprobe_trigger_print(struct seq_file *m,
557                                 struct event_trigger_ops *ops,
558                                 struct event_trigger_data *data)
559 {
560         /* Do not print eprobe event triggers */
561         return 0;
562 }
563
564 static void eprobe_trigger_func(struct event_trigger_data *data,
565                                 struct trace_buffer *buffer, void *rec,
566                                 struct ring_buffer_event *rbe)
567 {
568         struct eprobe_data *edata = data->private_data;
569
570         if (unlikely(!rec))
571                 return;
572
573         if (unlikely(!rec))
574                 return;
575
576         __eprobe_trace_func(edata, rec);
577 }
578
579 static struct event_trigger_ops eprobe_trigger_ops = {
580         .func                   = eprobe_trigger_func,
581         .print                  = eprobe_trigger_print,
582         .init                   = eprobe_trigger_init,
583         .free                   = eprobe_trigger_free,
584 };
585
586 static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
587                                    struct trace_event_file *file,
588                                    char *glob, char *cmd, char *param)
589 {
590         return -1;
591 }
592
593 static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
594                                  struct event_trigger_data *data,
595                                  struct trace_event_file *file)
596 {
597         return -1;
598 }
599
600 static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
601                                     struct event_trigger_data *data,
602                                     struct trace_event_file *file)
603 {
604
605 }
606
607 static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
608                                                         char *param)
609 {
610         return &eprobe_trigger_ops;
611 }
612
613 static struct event_command event_trigger_cmd = {
614         .name                   = "eprobe",
615         .trigger_type           = ETT_EVENT_EPROBE,
616         .flags                  = EVENT_CMD_FL_NEEDS_REC,
617         .func                   = eprobe_trigger_cmd_func,
618         .reg                    = eprobe_trigger_reg_func,
619         .unreg                  = eprobe_trigger_unreg_func,
620         .unreg_all              = NULL,
621         .get_trigger_ops        = eprobe_trigger_get_ops,
622         .set_filter             = NULL,
623 };
624
625 static struct event_trigger_data *
626 new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
627 {
628         struct event_trigger_data *trigger;
629         struct eprobe_data *edata;
630
631         edata = kzalloc(sizeof(*edata), GFP_KERNEL);
632         trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
633         if (!trigger || !edata) {
634                 kfree(edata);
635                 kfree(trigger);
636                 return ERR_PTR(-ENOMEM);
637         }
638
639         trigger->flags = EVENT_TRIGGER_FL_PROBE;
640         trigger->count = -1;
641         trigger->ops = &eprobe_trigger_ops;
642
643         /*
644          * EVENT PROBE triggers are not registered as commands with
645          * register_event_command(), as they are not controlled by the user
646          * from the trigger file
647          */
648         trigger->cmd_ops = &event_trigger_cmd;
649
650         INIT_LIST_HEAD(&trigger->list);
651         RCU_INIT_POINTER(trigger->filter, NULL);
652
653         edata->file = file;
654         edata->ep = ep;
655         trigger->private_data = edata;
656
657         return trigger;
658 }
659
660 static int enable_eprobe(struct trace_eprobe *ep,
661                          struct trace_event_file *eprobe_file)
662 {
663         struct event_trigger_data *trigger;
664         struct trace_event_file *file;
665         struct trace_array *tr = eprobe_file->tr;
666
667         file = find_event_file(tr, ep->event_system, ep->event_name);
668         if (!file)
669                 return -ENOENT;
670         trigger = new_eprobe_trigger(ep, eprobe_file);
671         if (IS_ERR(trigger))
672                 return PTR_ERR(trigger);
673
674         list_add_tail_rcu(&trigger->list, &file->triggers);
675
676         trace_event_trigger_enable_disable(file, 1);
677         update_cond_flag(file);
678
679         return 0;
680 }
681
682 static struct trace_event_functions eprobe_funcs = {
683         .trace          = print_eprobe_event
684 };
685
686 static int disable_eprobe(struct trace_eprobe *ep,
687                           struct trace_array *tr)
688 {
689         struct event_trigger_data *trigger;
690         struct trace_event_file *file;
691         struct eprobe_data *edata;
692
693         file = find_event_file(tr, ep->event_system, ep->event_name);
694         if (!file)
695                 return -ENOENT;
696
697         list_for_each_entry(trigger, &file->triggers, list) {
698                 if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
699                         continue;
700                 edata = trigger->private_data;
701                 if (edata->ep == ep)
702                         break;
703         }
704         if (list_entry_is_head(trigger, &file->triggers, list))
705                 return -ENODEV;
706
707         list_del_rcu(&trigger->list);
708
709         trace_event_trigger_enable_disable(file, 0);
710         update_cond_flag(file);
711
712         /* Make sure nothing is using the edata or trigger */
713         tracepoint_synchronize_unregister();
714
715         kfree(edata);
716         kfree(trigger);
717
718         return 0;
719 }
720
721 static int enable_trace_eprobe(struct trace_event_call *call,
722                                struct trace_event_file *file)
723 {
724         struct trace_probe *pos, *tp;
725         struct trace_eprobe *ep;
726         bool enabled;
727         int ret = 0;
728         int cnt = 0;
729
730         tp = trace_probe_primary_from_call(call);
731         if (WARN_ON_ONCE(!tp))
732                 return -ENODEV;
733         enabled = trace_probe_is_enabled(tp);
734
735         /* This also changes "enabled" state */
736         if (file) {
737                 ret = trace_probe_add_file(tp, file);
738                 if (ret)
739                         return ret;
740         } else
741                 trace_probe_set_flag(tp, TP_FLAG_PROFILE);
742
743         if (enabled)
744                 return 0;
745
746         list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
747                 ep = container_of(pos, struct trace_eprobe, tp);
748                 ret = enable_eprobe(ep, file);
749                 if (ret)
750                         break;
751                 enabled = true;
752                 cnt++;
753         }
754
755         if (ret) {
756                 /* Failed to enable one of them. Roll back all */
757                 if (enabled) {
758                         /*
759                          * It's a bug if one failed for something other than memory
760                          * not being available but another eprobe succeeded.
761                          */
762                         WARN_ON_ONCE(ret != -ENOMEM);
763
764                         list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
765                                 ep = container_of(pos, struct trace_eprobe, tp);
766                                 disable_eprobe(ep, file->tr);
767                                 if (!--cnt)
768                                         break;
769                         }
770                 }
771                 if (file)
772                         trace_probe_remove_file(tp, file);
773                 else
774                         trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
775         }
776
777         return ret;
778 }
779
780 static int disable_trace_eprobe(struct trace_event_call *call,
781                                 struct trace_event_file *file)
782 {
783         struct trace_probe *pos, *tp;
784         struct trace_eprobe *ep;
785
786         tp = trace_probe_primary_from_call(call);
787         if (WARN_ON_ONCE(!tp))
788                 return -ENODEV;
789
790         if (file) {
791                 if (!trace_probe_get_file_link(tp, file))
792                         return -ENOENT;
793                 if (!trace_probe_has_single_file(tp))
794                         goto out;
795                 trace_probe_clear_flag(tp, TP_FLAG_TRACE);
796         } else
797                 trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
798
799         if (!trace_probe_is_enabled(tp)) {
800                 list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
801                         ep = container_of(pos, struct trace_eprobe, tp);
802                         disable_eprobe(ep, file->tr);
803                 }
804         }
805
806  out:
807         if (file)
808                 /*
809                  * Synchronization is done in below function. For perf event,
810                  * file == NULL and perf_trace_event_unreg() calls
811                  * tracepoint_synchronize_unregister() to ensure synchronize
812                  * event. We don't need to care about it.
813                  */
814                 trace_probe_remove_file(tp, file);
815
816         return 0;
817 }
818
819 static int eprobe_register(struct trace_event_call *event,
820                            enum trace_reg type, void *data)
821 {
822         struct trace_event_file *file = data;
823
824         switch (type) {
825         case TRACE_REG_REGISTER:
826                 return enable_trace_eprobe(event, file);
827         case TRACE_REG_UNREGISTER:
828                 return disable_trace_eprobe(event, file);
829 #ifdef CONFIG_PERF_EVENTS
830         case TRACE_REG_PERF_REGISTER:
831         case TRACE_REG_PERF_UNREGISTER:
832         case TRACE_REG_PERF_OPEN:
833         case TRACE_REG_PERF_CLOSE:
834         case TRACE_REG_PERF_ADD:
835         case TRACE_REG_PERF_DEL:
836                 return 0;
837 #endif
838         }
839         return 0;
840 }
841
842 static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
843 {
844         struct trace_event_call *call = trace_probe_event_call(&ep->tp);
845
846         call->flags = TRACE_EVENT_FL_EPROBE;
847         call->event.funcs = &eprobe_funcs;
848         call->class->fields_array = eprobe_fields_array;
849         call->class->reg = eprobe_register;
850 }
851
852 static struct trace_event_call *
853 find_and_get_event(const char *system, const char *event_name)
854 {
855         struct trace_event_call *tp_event;
856         const char *name;
857
858         list_for_each_entry(tp_event, &ftrace_events, list) {
859                 /* Skip other probes and ftrace events */
860                 if (tp_event->flags &
861                     (TRACE_EVENT_FL_IGNORE_ENABLE |
862                      TRACE_EVENT_FL_KPROBE |
863                      TRACE_EVENT_FL_UPROBE |
864                      TRACE_EVENT_FL_EPROBE))
865                         continue;
866                 if (!tp_event->class->system ||
867                     strcmp(system, tp_event->class->system))
868                         continue;
869                 name = trace_event_name(tp_event);
870                 if (!name || strcmp(event_name, name))
871                         continue;
872                 if (!trace_event_try_get_ref(tp_event)) {
873                         return NULL;
874                         break;
875                 }
876                 return tp_event;
877                 break;
878         }
879         return NULL;
880 }
881
882 static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
883 {
884         unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
885         int ret;
886
887         ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
888         if (ret)
889                 return ret;
890
891         if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
892                 ret = trace_eprobe_tp_arg_update(ep, i);
893
894         /* Handle symbols "@" */
895         if (!ret)
896                 ret = traceprobe_update_arg(&ep->tp.args[i]);
897
898         return ret;
899 }
900
901 static int __trace_eprobe_create(int argc, const char *argv[])
902 {
903         /*
904          * Argument syntax:
905          *      e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
906          * Fetch args:
907          *  <name>=$<field>[:TYPE]
908          */
909         const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
910         const char *sys_event = NULL, *sys_name = NULL;
911         struct trace_event_call *event_call;
912         struct trace_eprobe *ep = NULL;
913         char buf1[MAX_EVENT_NAME_LEN];
914         char buf2[MAX_EVENT_NAME_LEN];
915         int ret = 0;
916         int i;
917
918         if (argc < 2 || argv[0][0] != 'e')
919                 return -ECANCELED;
920
921         trace_probe_log_init("event_probe", argc, argv);
922
923         event = strchr(&argv[0][1], ':');
924         if (event) {
925                 event++;
926                 ret = traceprobe_parse_event_name(&event, &group, buf1,
927                                                   event - argv[0]);
928                 if (ret)
929                         goto parse_error;
930         } else {
931                 strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
932                 sanitize_event_name(buf1);
933                 event = buf1;
934         }
935         if (!is_good_name(event) || !is_good_name(group))
936                 goto parse_error;
937
938         sys_event = argv[1];
939         ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
940                                           sys_event - argv[1]);
941         if (ret || !sys_name)
942                 goto parse_error;
943         if (!is_good_name(sys_event) || !is_good_name(sys_name))
944                 goto parse_error;
945
946         mutex_lock(&event_mutex);
947         event_call = find_and_get_event(sys_name, sys_event);
948         ep = alloc_event_probe(group, event, event_call, argc - 2);
949         mutex_unlock(&event_mutex);
950
951         if (IS_ERR(ep)) {
952                 ret = PTR_ERR(ep);
953                 /* This must return -ENOMEM or missing event, else there is a bug */
954                 WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
955                 ep = NULL;
956                 goto error;
957         }
958
959         argc -= 2; argv += 2;
960         /* parse arguments */
961         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
962                 trace_probe_log_set_index(i + 2);
963                 ret = trace_eprobe_tp_update_arg(ep, argv, i);
964                 if (ret)
965                         goto error;
966         }
967         ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
968         if (ret < 0)
969                 goto error;
970         init_trace_eprobe_call(ep);
971         mutex_lock(&event_mutex);
972         ret = trace_probe_register_event_call(&ep->tp);
973         if (ret) {
974                 if (ret == -EEXIST) {
975                         trace_probe_log_set_index(0);
976                         trace_probe_log_err(0, EVENT_EXIST);
977                 }
978                 mutex_unlock(&event_mutex);
979                 goto error;
980         }
981         ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
982         mutex_unlock(&event_mutex);
983         return ret;
984 parse_error:
985         ret = -EINVAL;
986 error:
987         trace_event_probe_cleanup(ep);
988         return ret;
989 }
990
991 /*
992  * Register dynevent at core_initcall. This allows kernel to setup eprobe
993  * events in postcore_initcall without tracefs.
994  */
995 static __init int trace_events_eprobe_init_early(void)
996 {
997         int err = 0;
998
999         err = dyn_event_register(&eprobe_dyn_event_ops);
1000         if (err)
1001                 pr_warn("Could not register eprobe_dyn_event_ops\n");
1002
1003         return err;
1004 }
1005 core_initcall(trace_events_eprobe_init_early);