GNU Linux-libre 5.4.257-gnu1
[releases.git] / kernel / trace / trace_events.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * event tracer
4  *
5  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
6  *
7  *  - Added format output of fields of the trace point.
8  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
9  *
10  */
11
12 #define pr_fmt(fmt) fmt
13
14 #include <linux/workqueue.h>
15 #include <linux/security.h>
16 #include <linux/spinlock.h>
17 #include <linux/kthread.h>
18 #include <linux/tracefs.h>
19 #include <linux/uaccess.h>
20 #include <linux/module.h>
21 #include <linux/ctype.h>
22 #include <linux/sort.h>
23 #include <linux/slab.h>
24 #include <linux/delay.h>
25
26 #include <trace/events/sched.h>
27
28 #include <asm/setup.h>
29
30 #include "trace_output.h"
31
32 #undef TRACE_SYSTEM
33 #define TRACE_SYSTEM "TRACE_SYSTEM"
34
35 DEFINE_MUTEX(event_mutex);
36
37 LIST_HEAD(ftrace_events);
38 static LIST_HEAD(ftrace_generic_fields);
39 static LIST_HEAD(ftrace_common_fields);
40
41 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
42
43 static struct kmem_cache *field_cachep;
44 static struct kmem_cache *file_cachep;
45
46 static inline int system_refcount(struct event_subsystem *system)
47 {
48         return system->ref_count;
49 }
50
51 static int system_refcount_inc(struct event_subsystem *system)
52 {
53         return system->ref_count++;
54 }
55
56 static int system_refcount_dec(struct event_subsystem *system)
57 {
58         return --system->ref_count;
59 }
60
61 /* Double loops, do not use break, only goto's work */
62 #define do_for_each_event_file(tr, file)                        \
63         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
64                 list_for_each_entry(file, &tr->events, list)
65
66 #define do_for_each_event_file_safe(tr, file)                   \
67         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
68                 struct trace_event_file *___n;                          \
69                 list_for_each_entry_safe(file, ___n, &tr->events, list)
70
71 #define while_for_each_event_file()             \
72         }
73
74 static struct ftrace_event_field *
75 __find_event_field(struct list_head *head, char *name)
76 {
77         struct ftrace_event_field *field;
78
79         list_for_each_entry(field, head, link) {
80                 if (!strcmp(field->name, name))
81                         return field;
82         }
83
84         return NULL;
85 }
86
87 struct ftrace_event_field *
88 trace_find_event_field(struct trace_event_call *call, char *name)
89 {
90         struct ftrace_event_field *field;
91         struct list_head *head;
92
93         head = trace_get_fields(call);
94         field = __find_event_field(head, name);
95         if (field)
96                 return field;
97
98         field = __find_event_field(&ftrace_generic_fields, name);
99         if (field)
100                 return field;
101
102         return __find_event_field(&ftrace_common_fields, name);
103 }
104
105 static int __trace_define_field(struct list_head *head, const char *type,
106                                 const char *name, int offset, int size,
107                                 int is_signed, int filter_type)
108 {
109         struct ftrace_event_field *field;
110
111         field = kmem_cache_alloc(field_cachep, GFP_TRACE);
112         if (!field)
113                 return -ENOMEM;
114
115         field->name = name;
116         field->type = type;
117
118         if (filter_type == FILTER_OTHER)
119                 field->filter_type = filter_assign_type(type);
120         else
121                 field->filter_type = filter_type;
122
123         field->offset = offset;
124         field->size = size;
125         field->is_signed = is_signed;
126
127         list_add(&field->link, head);
128
129         return 0;
130 }
131
132 int trace_define_field(struct trace_event_call *call, const char *type,
133                        const char *name, int offset, int size, int is_signed,
134                        int filter_type)
135 {
136         struct list_head *head;
137
138         if (WARN_ON(!call->class))
139                 return 0;
140
141         head = trace_get_fields(call);
142         return __trace_define_field(head, type, name, offset, size,
143                                     is_signed, filter_type);
144 }
145 EXPORT_SYMBOL_GPL(trace_define_field);
146
147 #define __generic_field(type, item, filter_type)                        \
148         ret = __trace_define_field(&ftrace_generic_fields, #type,       \
149                                    #item, 0, 0, is_signed_type(type),   \
150                                    filter_type);                        \
151         if (ret)                                                        \
152                 return ret;
153
154 #define __common_field(type, item)                                      \
155         ret = __trace_define_field(&ftrace_common_fields, #type,        \
156                                    "common_" #item,                     \
157                                    offsetof(typeof(ent), item),         \
158                                    sizeof(ent.item),                    \
159                                    is_signed_type(type), FILTER_OTHER); \
160         if (ret)                                                        \
161                 return ret;
162
163 static int trace_define_generic_fields(void)
164 {
165         int ret;
166
167         __generic_field(int, CPU, FILTER_CPU);
168         __generic_field(int, cpu, FILTER_CPU);
169         __generic_field(int, common_cpu, FILTER_CPU);
170         __generic_field(char *, COMM, FILTER_COMM);
171         __generic_field(char *, comm, FILTER_COMM);
172
173         return ret;
174 }
175
176 static int trace_define_common_fields(void)
177 {
178         int ret;
179         struct trace_entry ent;
180
181         __common_field(unsigned short, type);
182         __common_field(unsigned char, flags);
183         __common_field(unsigned char, preempt_count);
184         __common_field(int, pid);
185
186         return ret;
187 }
188
189 static void trace_destroy_fields(struct trace_event_call *call)
190 {
191         struct ftrace_event_field *field, *next;
192         struct list_head *head;
193
194         head = trace_get_fields(call);
195         list_for_each_entry_safe(field, next, head, link) {
196                 list_del(&field->link);
197                 kmem_cache_free(field_cachep, field);
198         }
199 }
200
201 /*
202  * run-time version of trace_event_get_offsets_<call>() that returns the last
203  * accessible offset of trace fields excluding __dynamic_array bytes
204  */
205 int trace_event_get_offsets(struct trace_event_call *call)
206 {
207         struct ftrace_event_field *tail;
208         struct list_head *head;
209
210         head = trace_get_fields(call);
211         /*
212          * head->next points to the last field with the largest offset,
213          * since it was added last by trace_define_field()
214          */
215         tail = list_first_entry(head, struct ftrace_event_field, link);
216         return tail->offset + tail->size;
217 }
218
219 int trace_event_raw_init(struct trace_event_call *call)
220 {
221         int id;
222
223         id = register_trace_event(&call->event);
224         if (!id)
225                 return -ENODEV;
226
227         return 0;
228 }
229 EXPORT_SYMBOL_GPL(trace_event_raw_init);
230
231 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
232 {
233         struct trace_array *tr = trace_file->tr;
234         struct trace_array_cpu *data;
235         struct trace_pid_list *pid_list;
236
237         pid_list = rcu_dereference_raw(tr->filtered_pids);
238         if (!pid_list)
239                 return false;
240
241         data = this_cpu_ptr(tr->trace_buffer.data);
242
243         return data->ignore_pid;
244 }
245 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
246
247 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
248                                  struct trace_event_file *trace_file,
249                                  unsigned long len)
250 {
251         struct trace_event_call *event_call = trace_file->event_call;
252
253         if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
254             trace_event_ignore_this_pid(trace_file))
255                 return NULL;
256
257         local_save_flags(fbuffer->flags);
258         fbuffer->pc = preempt_count();
259         /*
260          * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
261          * preemption (adding one to the preempt_count). Since we are
262          * interested in the preempt_count at the time the tracepoint was
263          * hit, we need to subtract one to offset the increment.
264          */
265         if (IS_ENABLED(CONFIG_PREEMPTION))
266                 fbuffer->pc--;
267         fbuffer->trace_file = trace_file;
268
269         fbuffer->event =
270                 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
271                                                 event_call->event.type, len,
272                                                 fbuffer->flags, fbuffer->pc);
273         if (!fbuffer->event)
274                 return NULL;
275
276         fbuffer->entry = ring_buffer_event_data(fbuffer->event);
277         return fbuffer->entry;
278 }
279 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
280
281 int trace_event_reg(struct trace_event_call *call,
282                     enum trace_reg type, void *data)
283 {
284         struct trace_event_file *file = data;
285
286         WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
287         switch (type) {
288         case TRACE_REG_REGISTER:
289                 return tracepoint_probe_register(call->tp,
290                                                  call->class->probe,
291                                                  file);
292         case TRACE_REG_UNREGISTER:
293                 tracepoint_probe_unregister(call->tp,
294                                             call->class->probe,
295                                             file);
296                 return 0;
297
298 #ifdef CONFIG_PERF_EVENTS
299         case TRACE_REG_PERF_REGISTER:
300                 return tracepoint_probe_register(call->tp,
301                                                  call->class->perf_probe,
302                                                  call);
303         case TRACE_REG_PERF_UNREGISTER:
304                 tracepoint_probe_unregister(call->tp,
305                                             call->class->perf_probe,
306                                             call);
307                 return 0;
308         case TRACE_REG_PERF_OPEN:
309         case TRACE_REG_PERF_CLOSE:
310         case TRACE_REG_PERF_ADD:
311         case TRACE_REG_PERF_DEL:
312                 return 0;
313 #endif
314         }
315         return 0;
316 }
317 EXPORT_SYMBOL_GPL(trace_event_reg);
318
319 void trace_event_enable_cmd_record(bool enable)
320 {
321         struct trace_event_file *file;
322         struct trace_array *tr;
323
324         lockdep_assert_held(&event_mutex);
325
326         do_for_each_event_file(tr, file) {
327
328                 if (!(file->flags & EVENT_FILE_FL_ENABLED))
329                         continue;
330
331                 if (enable) {
332                         tracing_start_cmdline_record();
333                         set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
334                 } else {
335                         tracing_stop_cmdline_record();
336                         clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
337                 }
338         } while_for_each_event_file();
339 }
340
341 void trace_event_enable_tgid_record(bool enable)
342 {
343         struct trace_event_file *file;
344         struct trace_array *tr;
345
346         lockdep_assert_held(&event_mutex);
347
348         do_for_each_event_file(tr, file) {
349                 if (!(file->flags & EVENT_FILE_FL_ENABLED))
350                         continue;
351
352                 if (enable) {
353                         tracing_start_tgid_record();
354                         set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
355                 } else {
356                         tracing_stop_tgid_record();
357                         clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
358                                   &file->flags);
359                 }
360         } while_for_each_event_file();
361 }
362
363 static int __ftrace_event_enable_disable(struct trace_event_file *file,
364                                          int enable, int soft_disable)
365 {
366         struct trace_event_call *call = file->event_call;
367         struct trace_array *tr = file->tr;
368         int ret = 0;
369         int disable;
370
371         switch (enable) {
372         case 0:
373                 /*
374                  * When soft_disable is set and enable is cleared, the sm_ref
375                  * reference counter is decremented. If it reaches 0, we want
376                  * to clear the SOFT_DISABLED flag but leave the event in the
377                  * state that it was. That is, if the event was enabled and
378                  * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
379                  * is set we do not want the event to be enabled before we
380                  * clear the bit.
381                  *
382                  * When soft_disable is not set but the SOFT_MODE flag is,
383                  * we do nothing. Do not disable the tracepoint, otherwise
384                  * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
385                  */
386                 if (soft_disable) {
387                         if (atomic_dec_return(&file->sm_ref) > 0)
388                                 break;
389                         disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
390                         clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
391                         /* Disable use of trace_buffered_event */
392                         trace_buffered_event_disable();
393                 } else
394                         disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
395
396                 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
397                         clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
398                         if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
399                                 tracing_stop_cmdline_record();
400                                 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
401                         }
402
403                         if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
404                                 tracing_stop_tgid_record();
405                                 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
406                         }
407
408                         call->class->reg(call, TRACE_REG_UNREGISTER, file);
409                 }
410                 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
411                 if (file->flags & EVENT_FILE_FL_SOFT_MODE)
412                         set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
413                 else
414                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
415                 break;
416         case 1:
417                 /*
418                  * When soft_disable is set and enable is set, we want to
419                  * register the tracepoint for the event, but leave the event
420                  * as is. That means, if the event was already enabled, we do
421                  * nothing (but set SOFT_MODE). If the event is disabled, we
422                  * set SOFT_DISABLED before enabling the event tracepoint, so
423                  * it still seems to be disabled.
424                  */
425                 if (!soft_disable)
426                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
427                 else {
428                         if (atomic_inc_return(&file->sm_ref) > 1)
429                                 break;
430                         set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
431                         /* Enable use of trace_buffered_event */
432                         trace_buffered_event_enable();
433                 }
434
435                 if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
436                         bool cmd = false, tgid = false;
437
438                         /* Keep the event disabled, when going to SOFT_MODE. */
439                         if (soft_disable)
440                                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
441
442                         if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
443                                 cmd = true;
444                                 tracing_start_cmdline_record();
445                                 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
446                         }
447
448                         if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
449                                 tgid = true;
450                                 tracing_start_tgid_record();
451                                 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
452                         }
453
454                         ret = call->class->reg(call, TRACE_REG_REGISTER, file);
455                         if (ret) {
456                                 if (cmd)
457                                         tracing_stop_cmdline_record();
458                                 if (tgid)
459                                         tracing_stop_tgid_record();
460                                 pr_info("event trace: Could not enable event "
461                                         "%s\n", trace_event_name(call));
462                                 break;
463                         }
464                         set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
465
466                         /* WAS_ENABLED gets set but never cleared. */
467                         set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
468                 }
469                 break;
470         }
471
472         return ret;
473 }
474
475 int trace_event_enable_disable(struct trace_event_file *file,
476                                int enable, int soft_disable)
477 {
478         return __ftrace_event_enable_disable(file, enable, soft_disable);
479 }
480
481 static int ftrace_event_enable_disable(struct trace_event_file *file,
482                                        int enable)
483 {
484         return __ftrace_event_enable_disable(file, enable, 0);
485 }
486
487 static void ftrace_clear_events(struct trace_array *tr)
488 {
489         struct trace_event_file *file;
490
491         mutex_lock(&event_mutex);
492         list_for_each_entry(file, &tr->events, list) {
493                 ftrace_event_enable_disable(file, 0);
494         }
495         mutex_unlock(&event_mutex);
496 }
497
498 static void
499 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
500 {
501         struct trace_pid_list *pid_list;
502         struct trace_array *tr = data;
503
504         pid_list = rcu_dereference_raw(tr->filtered_pids);
505         trace_filter_add_remove_task(pid_list, NULL, task);
506 }
507
508 static void
509 event_filter_pid_sched_process_fork(void *data,
510                                     struct task_struct *self,
511                                     struct task_struct *task)
512 {
513         struct trace_pid_list *pid_list;
514         struct trace_array *tr = data;
515
516         pid_list = rcu_dereference_sched(tr->filtered_pids);
517         trace_filter_add_remove_task(pid_list, self, task);
518 }
519
520 void trace_event_follow_fork(struct trace_array *tr, bool enable)
521 {
522         if (enable) {
523                 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
524                                                        tr, INT_MIN);
525                 register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
526                                                        tr, INT_MAX);
527         } else {
528                 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
529                                                     tr);
530                 unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
531                                                     tr);
532         }
533 }
534
535 static void
536 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
537                     struct task_struct *prev, struct task_struct *next)
538 {
539         struct trace_array *tr = data;
540         struct trace_pid_list *pid_list;
541
542         pid_list = rcu_dereference_sched(tr->filtered_pids);
543
544         this_cpu_write(tr->trace_buffer.data->ignore_pid,
545                        trace_ignore_this_task(pid_list, prev) &&
546                        trace_ignore_this_task(pid_list, next));
547 }
548
549 static void
550 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
551                     struct task_struct *prev, struct task_struct *next)
552 {
553         struct trace_array *tr = data;
554         struct trace_pid_list *pid_list;
555
556         pid_list = rcu_dereference_sched(tr->filtered_pids);
557
558         this_cpu_write(tr->trace_buffer.data->ignore_pid,
559                        trace_ignore_this_task(pid_list, next));
560 }
561
562 static void
563 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
564 {
565         struct trace_array *tr = data;
566         struct trace_pid_list *pid_list;
567
568         /* Nothing to do if we are already tracing */
569         if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
570                 return;
571
572         pid_list = rcu_dereference_sched(tr->filtered_pids);
573
574         this_cpu_write(tr->trace_buffer.data->ignore_pid,
575                        trace_ignore_this_task(pid_list, task));
576 }
577
578 static void
579 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
580 {
581         struct trace_array *tr = data;
582         struct trace_pid_list *pid_list;
583
584         /* Nothing to do if we are not tracing */
585         if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
586                 return;
587
588         pid_list = rcu_dereference_sched(tr->filtered_pids);
589
590         /* Set tracing if current is enabled */
591         this_cpu_write(tr->trace_buffer.data->ignore_pid,
592                        trace_ignore_this_task(pid_list, current));
593 }
594
595 static void __ftrace_clear_event_pids(struct trace_array *tr)
596 {
597         struct trace_pid_list *pid_list;
598         struct trace_event_file *file;
599         int cpu;
600
601         pid_list = rcu_dereference_protected(tr->filtered_pids,
602                                              lockdep_is_held(&event_mutex));
603         if (!pid_list)
604                 return;
605
606         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
607         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
608
609         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
610         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
611
612         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
613         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
614
615         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
616         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
617
618         list_for_each_entry(file, &tr->events, list) {
619                 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
620         }
621
622         for_each_possible_cpu(cpu)
623                 per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
624
625         rcu_assign_pointer(tr->filtered_pids, NULL);
626
627         /* Wait till all users are no longer using pid filtering */
628         tracepoint_synchronize_unregister();
629
630         trace_free_pid_list(pid_list);
631 }
632
633 static void ftrace_clear_event_pids(struct trace_array *tr)
634 {
635         mutex_lock(&event_mutex);
636         __ftrace_clear_event_pids(tr);
637         mutex_unlock(&event_mutex);
638 }
639
640 static void __put_system(struct event_subsystem *system)
641 {
642         struct event_filter *filter = system->filter;
643
644         WARN_ON_ONCE(system_refcount(system) == 0);
645         if (system_refcount_dec(system))
646                 return;
647
648         list_del(&system->list);
649
650         if (filter) {
651                 kfree(filter->filter_string);
652                 kfree(filter);
653         }
654         kfree_const(system->name);
655         kfree(system);
656 }
657
658 static void __get_system(struct event_subsystem *system)
659 {
660         WARN_ON_ONCE(system_refcount(system) == 0);
661         system_refcount_inc(system);
662 }
663
664 static void __get_system_dir(struct trace_subsystem_dir *dir)
665 {
666         WARN_ON_ONCE(dir->ref_count == 0);
667         dir->ref_count++;
668         __get_system(dir->subsystem);
669 }
670
671 static void __put_system_dir(struct trace_subsystem_dir *dir)
672 {
673         WARN_ON_ONCE(dir->ref_count == 0);
674         /* If the subsystem is about to be freed, the dir must be too */
675         WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
676
677         __put_system(dir->subsystem);
678         if (!--dir->ref_count)
679                 kfree(dir);
680 }
681
682 static void put_system(struct trace_subsystem_dir *dir)
683 {
684         mutex_lock(&event_mutex);
685         __put_system_dir(dir);
686         mutex_unlock(&event_mutex);
687 }
688
689 static void remove_subsystem(struct trace_subsystem_dir *dir)
690 {
691         if (!dir)
692                 return;
693
694         if (!--dir->nr_events) {
695                 tracefs_remove_recursive(dir->entry);
696                 list_del(&dir->list);
697                 __put_system_dir(dir);
698         }
699 }
700
701 static void remove_event_file_dir(struct trace_event_file *file)
702 {
703         struct dentry *dir = file->dir;
704         struct dentry *child;
705
706         if (dir) {
707                 spin_lock(&dir->d_lock);        /* probably unneeded */
708                 list_for_each_entry(child, &dir->d_subdirs, d_child) {
709                         if (d_really_is_positive(child))        /* probably unneeded */
710                                 d_inode(child)->i_private = NULL;
711                 }
712                 spin_unlock(&dir->d_lock);
713
714                 tracefs_remove_recursive(dir);
715         }
716
717         list_del(&file->list);
718         remove_subsystem(file->system);
719         free_event_filter(file->filter);
720         kmem_cache_free(file_cachep, file);
721 }
722
723 /*
724  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
725  */
726 static int
727 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
728                               const char *sub, const char *event, int set)
729 {
730         struct trace_event_file *file;
731         struct trace_event_call *call;
732         const char *name;
733         int ret = -EINVAL;
734         int eret = 0;
735
736         list_for_each_entry(file, &tr->events, list) {
737
738                 call = file->event_call;
739                 name = trace_event_name(call);
740
741                 if (!name || !call->class || !call->class->reg)
742                         continue;
743
744                 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
745                         continue;
746
747                 if (match &&
748                     strcmp(match, name) != 0 &&
749                     strcmp(match, call->class->system) != 0)
750                         continue;
751
752                 if (sub && strcmp(sub, call->class->system) != 0)
753                         continue;
754
755                 if (event && strcmp(event, name) != 0)
756                         continue;
757
758                 ret = ftrace_event_enable_disable(file, set);
759
760                 /*
761                  * Save the first error and return that. Some events
762                  * may still have been enabled, but let the user
763                  * know that something went wrong.
764                  */
765                 if (ret && !eret)
766                         eret = ret;
767
768                 ret = eret;
769         }
770
771         return ret;
772 }
773
774 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
775                                   const char *sub, const char *event, int set)
776 {
777         int ret;
778
779         mutex_lock(&event_mutex);
780         ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
781         mutex_unlock(&event_mutex);
782
783         return ret;
784 }
785
786 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
787 {
788         char *event = NULL, *sub = NULL, *match;
789         int ret;
790
791         if (!tr)
792                 return -ENOENT;
793         /*
794          * The buf format can be <subsystem>:<event-name>
795          *  *:<event-name> means any event by that name.
796          *  :<event-name> is the same.
797          *
798          *  <subsystem>:* means all events in that subsystem
799          *  <subsystem>: means the same.
800          *
801          *  <name> (no ':') means all events in a subsystem with
802          *  the name <name> or any event that matches <name>
803          */
804
805         match = strsep(&buf, ":");
806         if (buf) {
807                 sub = match;
808                 event = buf;
809                 match = NULL;
810
811                 if (!strlen(sub) || strcmp(sub, "*") == 0)
812                         sub = NULL;
813                 if (!strlen(event) || strcmp(event, "*") == 0)
814                         event = NULL;
815         }
816
817         ret = __ftrace_set_clr_event(tr, match, sub, event, set);
818
819         /* Put back the colon to allow this to be called again */
820         if (buf)
821                 *(buf - 1) = ':';
822
823         return ret;
824 }
825 EXPORT_SYMBOL_GPL(ftrace_set_clr_event);
826
827 /**
828  * trace_set_clr_event - enable or disable an event
829  * @system: system name to match (NULL for any system)
830  * @event: event name to match (NULL for all events, within system)
831  * @set: 1 to enable, 0 to disable
832  *
833  * This is a way for other parts of the kernel to enable or disable
834  * event recording.
835  *
836  * Returns 0 on success, -EINVAL if the parameters do not match any
837  * registered events.
838  */
839 int trace_set_clr_event(const char *system, const char *event, int set)
840 {
841         struct trace_array *tr = top_trace_array();
842
843         if (!tr)
844                 return -ENODEV;
845
846         return __ftrace_set_clr_event(tr, NULL, system, event, set);
847 }
848 EXPORT_SYMBOL_GPL(trace_set_clr_event);
849
850 /* 128 should be much more than enough */
851 #define EVENT_BUF_SIZE          127
852
853 static ssize_t
854 ftrace_event_write(struct file *file, const char __user *ubuf,
855                    size_t cnt, loff_t *ppos)
856 {
857         struct trace_parser parser;
858         struct seq_file *m = file->private_data;
859         struct trace_array *tr = m->private;
860         ssize_t read, ret;
861
862         if (!cnt)
863                 return 0;
864
865         ret = tracing_update_buffers();
866         if (ret < 0)
867                 return ret;
868
869         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
870                 return -ENOMEM;
871
872         read = trace_get_user(&parser, ubuf, cnt, ppos);
873
874         if (read >= 0 && trace_parser_loaded((&parser))) {
875                 int set = 1;
876
877                 if (*parser.buffer == '!')
878                         set = 0;
879
880                 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
881                 if (ret)
882                         goto out_put;
883         }
884
885         ret = read;
886
887  out_put:
888         trace_parser_put(&parser);
889
890         return ret;
891 }
892
893 static void *
894 t_next(struct seq_file *m, void *v, loff_t *pos)
895 {
896         struct trace_event_file *file = v;
897         struct trace_event_call *call;
898         struct trace_array *tr = m->private;
899
900         (*pos)++;
901
902         list_for_each_entry_continue(file, &tr->events, list) {
903                 call = file->event_call;
904                 /*
905                  * The ftrace subsystem is for showing formats only.
906                  * They can not be enabled or disabled via the event files.
907                  */
908                 if (call->class && call->class->reg &&
909                     !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
910                         return file;
911         }
912
913         return NULL;
914 }
915
916 static void *t_start(struct seq_file *m, loff_t *pos)
917 {
918         struct trace_event_file *file;
919         struct trace_array *tr = m->private;
920         loff_t l;
921
922         mutex_lock(&event_mutex);
923
924         file = list_entry(&tr->events, struct trace_event_file, list);
925         for (l = 0; l <= *pos; ) {
926                 file = t_next(m, file, &l);
927                 if (!file)
928                         break;
929         }
930         return file;
931 }
932
933 static void *
934 s_next(struct seq_file *m, void *v, loff_t *pos)
935 {
936         struct trace_event_file *file = v;
937         struct trace_array *tr = m->private;
938
939         (*pos)++;
940
941         list_for_each_entry_continue(file, &tr->events, list) {
942                 if (file->flags & EVENT_FILE_FL_ENABLED)
943                         return file;
944         }
945
946         return NULL;
947 }
948
949 static void *s_start(struct seq_file *m, loff_t *pos)
950 {
951         struct trace_event_file *file;
952         struct trace_array *tr = m->private;
953         loff_t l;
954
955         mutex_lock(&event_mutex);
956
957         file = list_entry(&tr->events, struct trace_event_file, list);
958         for (l = 0; l <= *pos; ) {
959                 file = s_next(m, file, &l);
960                 if (!file)
961                         break;
962         }
963         return file;
964 }
965
966 static int t_show(struct seq_file *m, void *v)
967 {
968         struct trace_event_file *file = v;
969         struct trace_event_call *call = file->event_call;
970
971         if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
972                 seq_printf(m, "%s:", call->class->system);
973         seq_printf(m, "%s\n", trace_event_name(call));
974
975         return 0;
976 }
977
978 static void t_stop(struct seq_file *m, void *p)
979 {
980         mutex_unlock(&event_mutex);
981 }
982
983 static void *
984 p_next(struct seq_file *m, void *v, loff_t *pos)
985 {
986         struct trace_array *tr = m->private;
987         struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
988
989         return trace_pid_next(pid_list, v, pos);
990 }
991
992 static void *p_start(struct seq_file *m, loff_t *pos)
993         __acquires(RCU)
994 {
995         struct trace_pid_list *pid_list;
996         struct trace_array *tr = m->private;
997
998         /*
999          * Grab the mutex, to keep calls to p_next() having the same
1000          * tr->filtered_pids as p_start() has.
1001          * If we just passed the tr->filtered_pids around, then RCU would
1002          * have been enough, but doing that makes things more complex.
1003          */
1004         mutex_lock(&event_mutex);
1005         rcu_read_lock_sched();
1006
1007         pid_list = rcu_dereference_sched(tr->filtered_pids);
1008
1009         if (!pid_list)
1010                 return NULL;
1011
1012         return trace_pid_start(pid_list, pos);
1013 }
1014
1015 static void p_stop(struct seq_file *m, void *p)
1016         __releases(RCU)
1017 {
1018         rcu_read_unlock_sched();
1019         mutex_unlock(&event_mutex);
1020 }
1021
1022 static ssize_t
1023 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1024                   loff_t *ppos)
1025 {
1026         struct trace_event_file *file;
1027         unsigned long flags;
1028         char buf[4] = "0";
1029
1030         mutex_lock(&event_mutex);
1031         file = event_file_data(filp);
1032         if (likely(file))
1033                 flags = file->flags;
1034         mutex_unlock(&event_mutex);
1035
1036         if (!file)
1037                 return -ENODEV;
1038
1039         if (flags & EVENT_FILE_FL_ENABLED &&
1040             !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1041                 strcpy(buf, "1");
1042
1043         if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1044             flags & EVENT_FILE_FL_SOFT_MODE)
1045                 strcat(buf, "*");
1046
1047         strcat(buf, "\n");
1048
1049         return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1050 }
1051
1052 static ssize_t
1053 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1054                    loff_t *ppos)
1055 {
1056         struct trace_event_file *file;
1057         unsigned long val;
1058         int ret;
1059
1060         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1061         if (ret)
1062                 return ret;
1063
1064         ret = tracing_update_buffers();
1065         if (ret < 0)
1066                 return ret;
1067
1068         switch (val) {
1069         case 0:
1070         case 1:
1071                 ret = -ENODEV;
1072                 mutex_lock(&event_mutex);
1073                 file = event_file_data(filp);
1074                 if (likely(file))
1075                         ret = ftrace_event_enable_disable(file, val);
1076                 mutex_unlock(&event_mutex);
1077                 break;
1078
1079         default:
1080                 return -EINVAL;
1081         }
1082
1083         *ppos += cnt;
1084
1085         return ret ? ret : cnt;
1086 }
1087
1088 static ssize_t
1089 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1090                    loff_t *ppos)
1091 {
1092         const char set_to_char[4] = { '?', '0', '1', 'X' };
1093         struct trace_subsystem_dir *dir = filp->private_data;
1094         struct event_subsystem *system = dir->subsystem;
1095         struct trace_event_call *call;
1096         struct trace_event_file *file;
1097         struct trace_array *tr = dir->tr;
1098         char buf[2];
1099         int set = 0;
1100         int ret;
1101
1102         mutex_lock(&event_mutex);
1103         list_for_each_entry(file, &tr->events, list) {
1104                 call = file->event_call;
1105                 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
1106                     !trace_event_name(call) || !call->class || !call->class->reg)
1107                         continue;
1108
1109                 if (system && strcmp(call->class->system, system->name) != 0)
1110                         continue;
1111
1112                 /*
1113                  * We need to find out if all the events are set
1114                  * or if all events or cleared, or if we have
1115                  * a mixture.
1116                  */
1117                 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1118
1119                 /*
1120                  * If we have a mixture, no need to look further.
1121                  */
1122                 if (set == 3)
1123                         break;
1124         }
1125         mutex_unlock(&event_mutex);
1126
1127         buf[0] = set_to_char[set];
1128         buf[1] = '\n';
1129
1130         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1131
1132         return ret;
1133 }
1134
1135 static ssize_t
1136 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1137                     loff_t *ppos)
1138 {
1139         struct trace_subsystem_dir *dir = filp->private_data;
1140         struct event_subsystem *system = dir->subsystem;
1141         const char *name = NULL;
1142         unsigned long val;
1143         ssize_t ret;
1144
1145         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1146         if (ret)
1147                 return ret;
1148
1149         ret = tracing_update_buffers();
1150         if (ret < 0)
1151                 return ret;
1152
1153         if (val != 0 && val != 1)
1154                 return -EINVAL;
1155
1156         /*
1157          * Opening of "enable" adds a ref count to system,
1158          * so the name is safe to use.
1159          */
1160         if (system)
1161                 name = system->name;
1162
1163         ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1164         if (ret)
1165                 goto out;
1166
1167         ret = cnt;
1168
1169 out:
1170         *ppos += cnt;
1171
1172         return ret;
1173 }
1174
1175 enum {
1176         FORMAT_HEADER           = 1,
1177         FORMAT_FIELD_SEPERATOR  = 2,
1178         FORMAT_PRINTFMT         = 3,
1179 };
1180
1181 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1182 {
1183         struct trace_event_call *call = event_file_data(m->private);
1184         struct list_head *common_head = &ftrace_common_fields;
1185         struct list_head *head = trace_get_fields(call);
1186         struct list_head *node = v;
1187
1188         (*pos)++;
1189
1190         switch ((unsigned long)v) {
1191         case FORMAT_HEADER:
1192                 node = common_head;
1193                 break;
1194
1195         case FORMAT_FIELD_SEPERATOR:
1196                 node = head;
1197                 break;
1198
1199         case FORMAT_PRINTFMT:
1200                 /* all done */
1201                 return NULL;
1202         }
1203
1204         node = node->prev;
1205         if (node == common_head)
1206                 return (void *)FORMAT_FIELD_SEPERATOR;
1207         else if (node == head)
1208                 return (void *)FORMAT_PRINTFMT;
1209         else
1210                 return node;
1211 }
1212
1213 static int f_show(struct seq_file *m, void *v)
1214 {
1215         struct trace_event_call *call = event_file_data(m->private);
1216         struct ftrace_event_field *field;
1217         const char *array_descriptor;
1218
1219         switch ((unsigned long)v) {
1220         case FORMAT_HEADER:
1221                 seq_printf(m, "name: %s\n", trace_event_name(call));
1222                 seq_printf(m, "ID: %d\n", call->event.type);
1223                 seq_puts(m, "format:\n");
1224                 return 0;
1225
1226         case FORMAT_FIELD_SEPERATOR:
1227                 seq_putc(m, '\n');
1228                 return 0;
1229
1230         case FORMAT_PRINTFMT:
1231                 seq_printf(m, "\nprint fmt: %s\n",
1232                            call->print_fmt);
1233                 return 0;
1234         }
1235
1236         field = list_entry(v, struct ftrace_event_field, link);
1237         /*
1238          * Smartly shows the array type(except dynamic array).
1239          * Normal:
1240          *      field:TYPE VAR
1241          * If TYPE := TYPE[LEN], it is shown:
1242          *      field:TYPE VAR[LEN]
1243          */
1244         array_descriptor = strchr(field->type, '[');
1245
1246         if (str_has_prefix(field->type, "__data_loc"))
1247                 array_descriptor = NULL;
1248
1249         if (!array_descriptor)
1250                 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1251                            field->type, field->name, field->offset,
1252                            field->size, !!field->is_signed);
1253         else
1254                 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1255                            (int)(array_descriptor - field->type),
1256                            field->type, field->name,
1257                            array_descriptor, field->offset,
1258                            field->size, !!field->is_signed);
1259
1260         return 0;
1261 }
1262
1263 static void *f_start(struct seq_file *m, loff_t *pos)
1264 {
1265         void *p = (void *)FORMAT_HEADER;
1266         loff_t l = 0;
1267
1268         /* ->stop() is called even if ->start() fails */
1269         mutex_lock(&event_mutex);
1270         if (!event_file_data(m->private))
1271                 return ERR_PTR(-ENODEV);
1272
1273         while (l < *pos && p)
1274                 p = f_next(m, p, &l);
1275
1276         return p;
1277 }
1278
1279 static void f_stop(struct seq_file *m, void *p)
1280 {
1281         mutex_unlock(&event_mutex);
1282 }
1283
1284 static const struct seq_operations trace_format_seq_ops = {
1285         .start          = f_start,
1286         .next           = f_next,
1287         .stop           = f_stop,
1288         .show           = f_show,
1289 };
1290
1291 static int trace_format_open(struct inode *inode, struct file *file)
1292 {
1293         struct seq_file *m;
1294         int ret;
1295
1296         /* Do we want to hide event format files on tracefs lockdown? */
1297
1298         ret = seq_open(file, &trace_format_seq_ops);
1299         if (ret < 0)
1300                 return ret;
1301
1302         m = file->private_data;
1303         m->private = file;
1304
1305         return 0;
1306 }
1307
1308 static ssize_t
1309 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1310 {
1311         int id = (long)event_file_data(filp);
1312         char buf[32];
1313         int len;
1314
1315         if (unlikely(!id))
1316                 return -ENODEV;
1317
1318         len = sprintf(buf, "%d\n", id);
1319
1320         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1321 }
1322
1323 static ssize_t
1324 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1325                   loff_t *ppos)
1326 {
1327         struct trace_event_file *file;
1328         struct trace_seq *s;
1329         int r = -ENODEV;
1330
1331         if (*ppos)
1332                 return 0;
1333
1334         s = kmalloc(sizeof(*s), GFP_KERNEL);
1335
1336         if (!s)
1337                 return -ENOMEM;
1338
1339         trace_seq_init(s);
1340
1341         mutex_lock(&event_mutex);
1342         file = event_file_data(filp);
1343         if (file)
1344                 print_event_filter(file, s);
1345         mutex_unlock(&event_mutex);
1346
1347         if (file)
1348                 r = simple_read_from_buffer(ubuf, cnt, ppos,
1349                                             s->buffer, trace_seq_used(s));
1350
1351         kfree(s);
1352
1353         return r;
1354 }
1355
1356 static ssize_t
1357 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1358                    loff_t *ppos)
1359 {
1360         struct trace_event_file *file;
1361         char *buf;
1362         int err = -ENODEV;
1363
1364         if (cnt >= PAGE_SIZE)
1365                 return -EINVAL;
1366
1367         buf = memdup_user_nul(ubuf, cnt);
1368         if (IS_ERR(buf))
1369                 return PTR_ERR(buf);
1370
1371         mutex_lock(&event_mutex);
1372         file = event_file_data(filp);
1373         if (file)
1374                 err = apply_event_filter(file, buf);
1375         mutex_unlock(&event_mutex);
1376
1377         kfree(buf);
1378         if (err < 0)
1379                 return err;
1380
1381         *ppos += cnt;
1382
1383         return cnt;
1384 }
1385
1386 static LIST_HEAD(event_subsystems);
1387
1388 static int subsystem_open(struct inode *inode, struct file *filp)
1389 {
1390         struct event_subsystem *system = NULL;
1391         struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1392         struct trace_array *tr;
1393         int ret;
1394
1395         if (tracing_is_disabled())
1396                 return -ENODEV;
1397
1398         /* Make sure the system still exists */
1399         mutex_lock(&event_mutex);
1400         mutex_lock(&trace_types_lock);
1401         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1402                 list_for_each_entry(dir, &tr->systems, list) {
1403                         if (dir == inode->i_private) {
1404                                 /* Don't open systems with no events */
1405                                 if (dir->nr_events) {
1406                                         __get_system_dir(dir);
1407                                         system = dir->subsystem;
1408                                 }
1409                                 goto exit_loop;
1410                         }
1411                 }
1412         }
1413  exit_loop:
1414         mutex_unlock(&trace_types_lock);
1415         mutex_unlock(&event_mutex);
1416
1417         if (!system)
1418                 return -ENODEV;
1419
1420         /* Some versions of gcc think dir can be uninitialized here */
1421         WARN_ON(!dir);
1422
1423         /* Still need to increment the ref count of the system */
1424         if (trace_array_get(tr) < 0) {
1425                 put_system(dir);
1426                 return -ENODEV;
1427         }
1428
1429         ret = tracing_open_generic(inode, filp);
1430         if (ret < 0) {
1431                 trace_array_put(tr);
1432                 put_system(dir);
1433         }
1434
1435         return ret;
1436 }
1437
1438 static int system_tr_open(struct inode *inode, struct file *filp)
1439 {
1440         struct trace_subsystem_dir *dir;
1441         struct trace_array *tr = inode->i_private;
1442         int ret;
1443
1444         /* Make a temporary dir that has no system but points to tr */
1445         dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1446         if (!dir)
1447                 return -ENOMEM;
1448
1449         ret = tracing_open_generic_tr(inode, filp);
1450         if (ret < 0) {
1451                 kfree(dir);
1452                 return ret;
1453         }
1454         dir->tr = tr;
1455         filp->private_data = dir;
1456
1457         return 0;
1458 }
1459
1460 static int subsystem_release(struct inode *inode, struct file *file)
1461 {
1462         struct trace_subsystem_dir *dir = file->private_data;
1463
1464         trace_array_put(dir->tr);
1465
1466         /*
1467          * If dir->subsystem is NULL, then this is a temporary
1468          * descriptor that was made for a trace_array to enable
1469          * all subsystems.
1470          */
1471         if (dir->subsystem)
1472                 put_system(dir);
1473         else
1474                 kfree(dir);
1475
1476         return 0;
1477 }
1478
1479 static ssize_t
1480 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1481                       loff_t *ppos)
1482 {
1483         struct trace_subsystem_dir *dir = filp->private_data;
1484         struct event_subsystem *system = dir->subsystem;
1485         struct trace_seq *s;
1486         int r;
1487
1488         if (*ppos)
1489                 return 0;
1490
1491         s = kmalloc(sizeof(*s), GFP_KERNEL);
1492         if (!s)
1493                 return -ENOMEM;
1494
1495         trace_seq_init(s);
1496
1497         print_subsystem_event_filter(system, s);
1498         r = simple_read_from_buffer(ubuf, cnt, ppos,
1499                                     s->buffer, trace_seq_used(s));
1500
1501         kfree(s);
1502
1503         return r;
1504 }
1505
1506 static ssize_t
1507 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1508                        loff_t *ppos)
1509 {
1510         struct trace_subsystem_dir *dir = filp->private_data;
1511         char *buf;
1512         int err;
1513
1514         if (cnt >= PAGE_SIZE)
1515                 return -EINVAL;
1516
1517         buf = memdup_user_nul(ubuf, cnt);
1518         if (IS_ERR(buf))
1519                 return PTR_ERR(buf);
1520
1521         err = apply_subsystem_event_filter(dir, buf);
1522         kfree(buf);
1523         if (err < 0)
1524                 return err;
1525
1526         *ppos += cnt;
1527
1528         return cnt;
1529 }
1530
1531 static ssize_t
1532 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1533 {
1534         int (*func)(struct trace_seq *s) = filp->private_data;
1535         struct trace_seq *s;
1536         int r;
1537
1538         if (*ppos)
1539                 return 0;
1540
1541         s = kmalloc(sizeof(*s), GFP_KERNEL);
1542         if (!s)
1543                 return -ENOMEM;
1544
1545         trace_seq_init(s);
1546
1547         func(s);
1548         r = simple_read_from_buffer(ubuf, cnt, ppos,
1549                                     s->buffer, trace_seq_used(s));
1550
1551         kfree(s);
1552
1553         return r;
1554 }
1555
1556 static void ignore_task_cpu(void *data)
1557 {
1558         struct trace_array *tr = data;
1559         struct trace_pid_list *pid_list;
1560
1561         /*
1562          * This function is called by on_each_cpu() while the
1563          * event_mutex is held.
1564          */
1565         pid_list = rcu_dereference_protected(tr->filtered_pids,
1566                                              mutex_is_locked(&event_mutex));
1567
1568         this_cpu_write(tr->trace_buffer.data->ignore_pid,
1569                        trace_ignore_this_task(pid_list, current));
1570 }
1571
1572 static ssize_t
1573 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1574                        size_t cnt, loff_t *ppos)
1575 {
1576         struct seq_file *m = filp->private_data;
1577         struct trace_array *tr = m->private;
1578         struct trace_pid_list *filtered_pids = NULL;
1579         struct trace_pid_list *pid_list;
1580         struct trace_event_file *file;
1581         ssize_t ret;
1582
1583         if (!cnt)
1584                 return 0;
1585
1586         ret = tracing_update_buffers();
1587         if (ret < 0)
1588                 return ret;
1589
1590         mutex_lock(&event_mutex);
1591
1592         filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1593                                              lockdep_is_held(&event_mutex));
1594
1595         ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
1596         if (ret < 0)
1597                 goto out;
1598
1599         rcu_assign_pointer(tr->filtered_pids, pid_list);
1600
1601         list_for_each_entry(file, &tr->events, list) {
1602                 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1603         }
1604
1605         if (filtered_pids) {
1606                 tracepoint_synchronize_unregister();
1607                 trace_free_pid_list(filtered_pids);
1608         } else if (pid_list) {
1609                 /*
1610                  * Register a probe that is called before all other probes
1611                  * to set ignore_pid if next or prev do not match.
1612                  * Register a probe this is called after all other probes
1613                  * to only keep ignore_pid set if next pid matches.
1614                  */
1615                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1616                                                  tr, INT_MAX);
1617                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1618                                                  tr, 0);
1619
1620                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1621                                                  tr, INT_MAX);
1622                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1623                                                  tr, 0);
1624
1625                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1626                                                      tr, INT_MAX);
1627                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1628                                                      tr, 0);
1629
1630                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1631                                                  tr, INT_MAX);
1632                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1633                                                  tr, 0);
1634         }
1635
1636         /*
1637          * Ignoring of pids is done at task switch. But we have to
1638          * check for those tasks that are currently running.
1639          * Always do this in case a pid was appended or removed.
1640          */
1641         on_each_cpu(ignore_task_cpu, tr, 1);
1642
1643  out:
1644         mutex_unlock(&event_mutex);
1645
1646         if (ret > 0)
1647                 *ppos += ret;
1648
1649         return ret;
1650 }
1651
1652 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1653 static int ftrace_event_set_open(struct inode *inode, struct file *file);
1654 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1655 static int ftrace_event_release(struct inode *inode, struct file *file);
1656
1657 static const struct seq_operations show_event_seq_ops = {
1658         .start = t_start,
1659         .next = t_next,
1660         .show = t_show,
1661         .stop = t_stop,
1662 };
1663
1664 static const struct seq_operations show_set_event_seq_ops = {
1665         .start = s_start,
1666         .next = s_next,
1667         .show = t_show,
1668         .stop = t_stop,
1669 };
1670
1671 static const struct seq_operations show_set_pid_seq_ops = {
1672         .start = p_start,
1673         .next = p_next,
1674         .show = trace_pid_show,
1675         .stop = p_stop,
1676 };
1677
1678 static const struct file_operations ftrace_avail_fops = {
1679         .open = ftrace_event_avail_open,
1680         .read = seq_read,
1681         .llseek = seq_lseek,
1682         .release = seq_release,
1683 };
1684
1685 static const struct file_operations ftrace_set_event_fops = {
1686         .open = ftrace_event_set_open,
1687         .read = seq_read,
1688         .write = ftrace_event_write,
1689         .llseek = seq_lseek,
1690         .release = ftrace_event_release,
1691 };
1692
1693 static const struct file_operations ftrace_set_event_pid_fops = {
1694         .open = ftrace_event_set_pid_open,
1695         .read = seq_read,
1696         .write = ftrace_event_pid_write,
1697         .llseek = seq_lseek,
1698         .release = ftrace_event_release,
1699 };
1700
1701 static const struct file_operations ftrace_enable_fops = {
1702         .open = tracing_open_generic,
1703         .read = event_enable_read,
1704         .write = event_enable_write,
1705         .llseek = default_llseek,
1706 };
1707
1708 static const struct file_operations ftrace_event_format_fops = {
1709         .open = trace_format_open,
1710         .read = seq_read,
1711         .llseek = seq_lseek,
1712         .release = seq_release,
1713 };
1714
1715 static const struct file_operations ftrace_event_id_fops = {
1716         .read = event_id_read,
1717         .llseek = default_llseek,
1718 };
1719
1720 static const struct file_operations ftrace_event_filter_fops = {
1721         .open = tracing_open_generic,
1722         .read = event_filter_read,
1723         .write = event_filter_write,
1724         .llseek = default_llseek,
1725 };
1726
1727 static const struct file_operations ftrace_subsystem_filter_fops = {
1728         .open = subsystem_open,
1729         .read = subsystem_filter_read,
1730         .write = subsystem_filter_write,
1731         .llseek = default_llseek,
1732         .release = subsystem_release,
1733 };
1734
1735 static const struct file_operations ftrace_system_enable_fops = {
1736         .open = subsystem_open,
1737         .read = system_enable_read,
1738         .write = system_enable_write,
1739         .llseek = default_llseek,
1740         .release = subsystem_release,
1741 };
1742
1743 static const struct file_operations ftrace_tr_enable_fops = {
1744         .open = system_tr_open,
1745         .read = system_enable_read,
1746         .write = system_enable_write,
1747         .llseek = default_llseek,
1748         .release = subsystem_release,
1749 };
1750
1751 static const struct file_operations ftrace_show_header_fops = {
1752         .open = tracing_open_generic,
1753         .read = show_header,
1754         .llseek = default_llseek,
1755 };
1756
1757 static int
1758 ftrace_event_open(struct inode *inode, struct file *file,
1759                   const struct seq_operations *seq_ops)
1760 {
1761         struct seq_file *m;
1762         int ret;
1763
1764         ret = security_locked_down(LOCKDOWN_TRACEFS);
1765         if (ret)
1766                 return ret;
1767
1768         ret = seq_open(file, seq_ops);
1769         if (ret < 0)
1770                 return ret;
1771         m = file->private_data;
1772         /* copy tr over to seq ops */
1773         m->private = inode->i_private;
1774
1775         return ret;
1776 }
1777
1778 static int ftrace_event_release(struct inode *inode, struct file *file)
1779 {
1780         struct trace_array *tr = inode->i_private;
1781
1782         trace_array_put(tr);
1783
1784         return seq_release(inode, file);
1785 }
1786
1787 static int
1788 ftrace_event_avail_open(struct inode *inode, struct file *file)
1789 {
1790         const struct seq_operations *seq_ops = &show_event_seq_ops;
1791
1792         /* Checks for tracefs lockdown */
1793         return ftrace_event_open(inode, file, seq_ops);
1794 }
1795
1796 static int
1797 ftrace_event_set_open(struct inode *inode, struct file *file)
1798 {
1799         const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1800         struct trace_array *tr = inode->i_private;
1801         int ret;
1802
1803         ret = tracing_check_open_get_tr(tr);
1804         if (ret)
1805                 return ret;
1806
1807         if ((file->f_mode & FMODE_WRITE) &&
1808             (file->f_flags & O_TRUNC))
1809                 ftrace_clear_events(tr);
1810
1811         ret = ftrace_event_open(inode, file, seq_ops);
1812         if (ret < 0)
1813                 trace_array_put(tr);
1814         return ret;
1815 }
1816
1817 static int
1818 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1819 {
1820         const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1821         struct trace_array *tr = inode->i_private;
1822         int ret;
1823
1824         ret = tracing_check_open_get_tr(tr);
1825         if (ret)
1826                 return ret;
1827
1828         if ((file->f_mode & FMODE_WRITE) &&
1829             (file->f_flags & O_TRUNC))
1830                 ftrace_clear_event_pids(tr);
1831
1832         ret = ftrace_event_open(inode, file, seq_ops);
1833         if (ret < 0)
1834                 trace_array_put(tr);
1835         return ret;
1836 }
1837
1838 static struct event_subsystem *
1839 create_new_subsystem(const char *name)
1840 {
1841         struct event_subsystem *system;
1842
1843         /* need to create new entry */
1844         system = kmalloc(sizeof(*system), GFP_KERNEL);
1845         if (!system)
1846                 return NULL;
1847
1848         system->ref_count = 1;
1849
1850         /* Only allocate if dynamic (kprobes and modules) */
1851         system->name = kstrdup_const(name, GFP_KERNEL);
1852         if (!system->name)
1853                 goto out_free;
1854
1855         system->filter = NULL;
1856
1857         system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
1858         if (!system->filter)
1859                 goto out_free;
1860
1861         list_add(&system->list, &event_subsystems);
1862
1863         return system;
1864
1865  out_free:
1866         kfree_const(system->name);
1867         kfree(system);
1868         return NULL;
1869 }
1870
1871 static struct dentry *
1872 event_subsystem_dir(struct trace_array *tr, const char *name,
1873                     struct trace_event_file *file, struct dentry *parent)
1874 {
1875         struct trace_subsystem_dir *dir;
1876         struct event_subsystem *system;
1877         struct dentry *entry;
1878
1879         /* First see if we did not already create this dir */
1880         list_for_each_entry(dir, &tr->systems, list) {
1881                 system = dir->subsystem;
1882                 if (strcmp(system->name, name) == 0) {
1883                         dir->nr_events++;
1884                         file->system = dir;
1885                         return dir->entry;
1886                 }
1887         }
1888
1889         /* Now see if the system itself exists. */
1890         list_for_each_entry(system, &event_subsystems, list) {
1891                 if (strcmp(system->name, name) == 0)
1892                         break;
1893         }
1894         /* Reset system variable when not found */
1895         if (&system->list == &event_subsystems)
1896                 system = NULL;
1897
1898         dir = kmalloc(sizeof(*dir), GFP_KERNEL);
1899         if (!dir)
1900                 goto out_fail;
1901
1902         if (!system) {
1903                 system = create_new_subsystem(name);
1904                 if (!system)
1905                         goto out_free;
1906         } else
1907                 __get_system(system);
1908
1909         dir->entry = tracefs_create_dir(name, parent);
1910         if (!dir->entry) {
1911                 pr_warn("Failed to create system directory %s\n", name);
1912                 __put_system(system);
1913                 goto out_free;
1914         }
1915
1916         dir->tr = tr;
1917         dir->ref_count = 1;
1918         dir->nr_events = 1;
1919         dir->subsystem = system;
1920         file->system = dir;
1921
1922         entry = tracefs_create_file("filter", 0644, dir->entry, dir,
1923                                     &ftrace_subsystem_filter_fops);
1924         if (!entry) {
1925                 kfree(system->filter);
1926                 system->filter = NULL;
1927                 pr_warn("Could not create tracefs '%s/filter' entry\n", name);
1928         }
1929
1930         trace_create_file("enable", 0644, dir->entry, dir,
1931                           &ftrace_system_enable_fops);
1932
1933         list_add(&dir->list, &tr->systems);
1934
1935         return dir->entry;
1936
1937  out_free:
1938         kfree(dir);
1939  out_fail:
1940         /* Only print this message if failed on memory allocation */
1941         if (!dir || !system)
1942                 pr_warn("No memory to create event subsystem %s\n", name);
1943         return NULL;
1944 }
1945
1946 static int
1947 event_create_dir(struct dentry *parent, struct trace_event_file *file)
1948 {
1949         struct trace_event_call *call = file->event_call;
1950         struct trace_array *tr = file->tr;
1951         struct list_head *head;
1952         struct dentry *d_events;
1953         const char *name;
1954         int ret;
1955
1956         /*
1957          * If the trace point header did not define TRACE_SYSTEM
1958          * then the system would be called "TRACE_SYSTEM".
1959          */
1960         if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
1961                 d_events = event_subsystem_dir(tr, call->class->system, file, parent);
1962                 if (!d_events)
1963                         return -ENOMEM;
1964         } else
1965                 d_events = parent;
1966
1967         name = trace_event_name(call);
1968         file->dir = tracefs_create_dir(name, d_events);
1969         if (!file->dir) {
1970                 pr_warn("Could not create tracefs '%s' directory\n", name);
1971                 return -1;
1972         }
1973
1974         if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1975                 trace_create_file("enable", 0644, file->dir, file,
1976                                   &ftrace_enable_fops);
1977
1978 #ifdef CONFIG_PERF_EVENTS
1979         if (call->event.type && call->class->reg)
1980                 trace_create_file("id", 0444, file->dir,
1981                                   (void *)(long)call->event.type,
1982                                   &ftrace_event_id_fops);
1983 #endif
1984
1985         /*
1986          * Other events may have the same class. Only update
1987          * the fields if they are not already defined.
1988          */
1989         head = trace_get_fields(call);
1990         if (list_empty(head)) {
1991                 ret = call->class->define_fields(call);
1992                 if (ret < 0) {
1993                         pr_warn("Could not initialize trace point events/%s\n",
1994                                 name);
1995                         return -1;
1996                 }
1997         }
1998
1999         /*
2000          * Only event directories that can be enabled should have
2001          * triggers or filters.
2002          */
2003         if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2004                 trace_create_file("filter", 0644, file->dir, file,
2005                                   &ftrace_event_filter_fops);
2006
2007                 trace_create_file("trigger", 0644, file->dir, file,
2008                                   &event_trigger_fops);
2009         }
2010
2011 #ifdef CONFIG_HIST_TRIGGERS
2012         trace_create_file("hist", 0444, file->dir, file,
2013                           &event_hist_fops);
2014 #endif
2015         trace_create_file("format", 0444, file->dir, call,
2016                           &ftrace_event_format_fops);
2017
2018         return 0;
2019 }
2020
2021 static void remove_event_from_tracers(struct trace_event_call *call)
2022 {
2023         struct trace_event_file *file;
2024         struct trace_array *tr;
2025
2026         do_for_each_event_file_safe(tr, file) {
2027                 if (file->event_call != call)
2028                         continue;
2029
2030                 remove_event_file_dir(file);
2031                 /*
2032                  * The do_for_each_event_file_safe() is
2033                  * a double loop. After finding the call for this
2034                  * trace_array, we use break to jump to the next
2035                  * trace_array.
2036                  */
2037                 break;
2038         } while_for_each_event_file();
2039 }
2040
2041 static void event_remove(struct trace_event_call *call)
2042 {
2043         struct trace_array *tr;
2044         struct trace_event_file *file;
2045
2046         do_for_each_event_file(tr, file) {
2047                 if (file->event_call != call)
2048                         continue;
2049
2050                 if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2051                         tr->clear_trace = true;
2052
2053                 ftrace_event_enable_disable(file, 0);
2054                 /*
2055                  * The do_for_each_event_file() is
2056                  * a double loop. After finding the call for this
2057                  * trace_array, we use break to jump to the next
2058                  * trace_array.
2059                  */
2060                 break;
2061         } while_for_each_event_file();
2062
2063         if (call->event.funcs)
2064                 __unregister_trace_event(&call->event);
2065         remove_event_from_tracers(call);
2066         list_del(&call->list);
2067 }
2068
2069 static int event_init(struct trace_event_call *call)
2070 {
2071         int ret = 0;
2072         const char *name;
2073
2074         name = trace_event_name(call);
2075         if (WARN_ON(!name))
2076                 return -EINVAL;
2077
2078         if (call->class->raw_init) {
2079                 ret = call->class->raw_init(call);
2080                 if (ret < 0 && ret != -ENOSYS)
2081                         pr_warn("Could not initialize trace events/%s\n", name);
2082         }
2083
2084         return ret;
2085 }
2086
2087 static int
2088 __register_event(struct trace_event_call *call, struct module *mod)
2089 {
2090         int ret;
2091
2092         ret = event_init(call);
2093         if (ret < 0)
2094                 return ret;
2095
2096         list_add(&call->list, &ftrace_events);
2097         call->mod = mod;
2098
2099         return 0;
2100 }
2101
2102 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2103 {
2104         int rlen;
2105         int elen;
2106
2107         /* Find the length of the eval value as a string */
2108         elen = snprintf(ptr, 0, "%ld", map->eval_value);
2109         /* Make sure there's enough room to replace the string with the value */
2110         if (len < elen)
2111                 return NULL;
2112
2113         snprintf(ptr, elen + 1, "%ld", map->eval_value);
2114
2115         /* Get the rest of the string of ptr */
2116         rlen = strlen(ptr + len);
2117         memmove(ptr + elen, ptr + len, rlen);
2118         /* Make sure we end the new string */
2119         ptr[elen + rlen] = 0;
2120
2121         return ptr + elen;
2122 }
2123
2124 static void update_event_printk(struct trace_event_call *call,
2125                                 struct trace_eval_map *map)
2126 {
2127         char *ptr;
2128         int quote = 0;
2129         int len = strlen(map->eval_string);
2130
2131         for (ptr = call->print_fmt; *ptr; ptr++) {
2132                 if (*ptr == '\\') {
2133                         ptr++;
2134                         /* paranoid */
2135                         if (!*ptr)
2136                                 break;
2137                         continue;
2138                 }
2139                 if (*ptr == '"') {
2140                         quote ^= 1;
2141                         continue;
2142                 }
2143                 if (quote)
2144                         continue;
2145                 if (isdigit(*ptr)) {
2146                         /* skip numbers */
2147                         do {
2148                                 ptr++;
2149                                 /* Check for alpha chars like ULL */
2150                         } while (isalnum(*ptr));
2151                         if (!*ptr)
2152                                 break;
2153                         /*
2154                          * A number must have some kind of delimiter after
2155                          * it, and we can ignore that too.
2156                          */
2157                         continue;
2158                 }
2159                 if (isalpha(*ptr) || *ptr == '_') {
2160                         if (strncmp(map->eval_string, ptr, len) == 0 &&
2161                             !isalnum(ptr[len]) && ptr[len] != '_') {
2162                                 ptr = eval_replace(ptr, map, len);
2163                                 /* enum/sizeof string smaller than value */
2164                                 if (WARN_ON_ONCE(!ptr))
2165                                         return;
2166                                 /*
2167                                  * No need to decrement here, as eval_replace()
2168                                  * returns the pointer to the character passed
2169                                  * the eval, and two evals can not be placed
2170                                  * back to back without something in between.
2171                                  * We can skip that something in between.
2172                                  */
2173                                 continue;
2174                         }
2175                 skip_more:
2176                         do {
2177                                 ptr++;
2178                         } while (isalnum(*ptr) || *ptr == '_');
2179                         if (!*ptr)
2180                                 break;
2181                         /*
2182                          * If what comes after this variable is a '.' or
2183                          * '->' then we can continue to ignore that string.
2184                          */
2185                         if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2186                                 ptr += *ptr == '.' ? 1 : 2;
2187                                 if (!*ptr)
2188                                         break;
2189                                 goto skip_more;
2190                         }
2191                         /*
2192                          * Once again, we can skip the delimiter that came
2193                          * after the string.
2194                          */
2195                         continue;
2196                 }
2197         }
2198 }
2199
2200 void trace_event_eval_update(struct trace_eval_map **map, int len)
2201 {
2202         struct trace_event_call *call, *p;
2203         const char *last_system = NULL;
2204         bool first = false;
2205         int last_i;
2206         int i;
2207
2208         down_write(&trace_event_sem);
2209         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2210                 /* events are usually grouped together with systems */
2211                 if (!last_system || call->class->system != last_system) {
2212                         first = true;
2213                         last_i = 0;
2214                         last_system = call->class->system;
2215                 }
2216
2217                 /*
2218                  * Since calls are grouped by systems, the likelyhood that the
2219                  * next call in the iteration belongs to the same system as the
2220                  * previous call is high. As an optimization, we skip seaching
2221                  * for a map[] that matches the call's system if the last call
2222                  * was from the same system. That's what last_i is for. If the
2223                  * call has the same system as the previous call, then last_i
2224                  * will be the index of the first map[] that has a matching
2225                  * system.
2226                  */
2227                 for (i = last_i; i < len; i++) {
2228                         if (call->class->system == map[i]->system) {
2229                                 /* Save the first system if need be */
2230                                 if (first) {
2231                                         last_i = i;
2232                                         first = false;
2233                                 }
2234                                 update_event_printk(call, map[i]);
2235                         }
2236                 }
2237         }
2238         up_write(&trace_event_sem);
2239 }
2240
2241 static struct trace_event_file *
2242 trace_create_new_event(struct trace_event_call *call,
2243                        struct trace_array *tr)
2244 {
2245         struct trace_pid_list *pid_list;
2246         struct trace_event_file *file;
2247
2248         file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2249         if (!file)
2250                 return NULL;
2251
2252         pid_list = rcu_dereference_protected(tr->filtered_pids,
2253                                              lockdep_is_held(&event_mutex));
2254
2255         if (pid_list)
2256                 file->flags |= EVENT_FILE_FL_PID_FILTER;
2257
2258         file->event_call = call;
2259         file->tr = tr;
2260         atomic_set(&file->sm_ref, 0);
2261         atomic_set(&file->tm_ref, 0);
2262         INIT_LIST_HEAD(&file->triggers);
2263         list_add(&file->list, &tr->events);
2264
2265         return file;
2266 }
2267
2268 /* Add an event to a trace directory */
2269 static int
2270 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2271 {
2272         struct trace_event_file *file;
2273
2274         file = trace_create_new_event(call, tr);
2275         if (!file)
2276                 return -ENOMEM;
2277
2278         return event_create_dir(tr->event_dir, file);
2279 }
2280
2281 /*
2282  * Just create a decriptor for early init. A descriptor is required
2283  * for enabling events at boot. We want to enable events before
2284  * the filesystem is initialized.
2285  */
2286 static __init int
2287 __trace_early_add_new_event(struct trace_event_call *call,
2288                             struct trace_array *tr)
2289 {
2290         struct trace_event_file *file;
2291
2292         file = trace_create_new_event(call, tr);
2293         if (!file)
2294                 return -ENOMEM;
2295
2296         return 0;
2297 }
2298
2299 struct ftrace_module_file_ops;
2300 static void __add_event_to_tracers(struct trace_event_call *call);
2301
2302 /* Add an additional event_call dynamically */
2303 int trace_add_event_call(struct trace_event_call *call)
2304 {
2305         int ret;
2306         lockdep_assert_held(&event_mutex);
2307
2308         mutex_lock(&trace_types_lock);
2309
2310         ret = __register_event(call, NULL);
2311         if (ret >= 0)
2312                 __add_event_to_tracers(call);
2313
2314         mutex_unlock(&trace_types_lock);
2315         return ret;
2316 }
2317
2318 /*
2319  * Must be called under locking of trace_types_lock, event_mutex and
2320  * trace_event_sem.
2321  */
2322 static void __trace_remove_event_call(struct trace_event_call *call)
2323 {
2324         event_remove(call);
2325         trace_destroy_fields(call);
2326         free_event_filter(call->filter);
2327         call->filter = NULL;
2328 }
2329
2330 static int probe_remove_event_call(struct trace_event_call *call)
2331 {
2332         struct trace_array *tr;
2333         struct trace_event_file *file;
2334
2335 #ifdef CONFIG_PERF_EVENTS
2336         if (call->perf_refcount)
2337                 return -EBUSY;
2338 #endif
2339         do_for_each_event_file(tr, file) {
2340                 if (file->event_call != call)
2341                         continue;
2342                 /*
2343                  * We can't rely on ftrace_event_enable_disable(enable => 0)
2344                  * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2345                  * TRACE_REG_UNREGISTER.
2346                  */
2347                 if (file->flags & EVENT_FILE_FL_ENABLED)
2348                         goto busy;
2349
2350                 if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2351                         tr->clear_trace = true;
2352                 /*
2353                  * The do_for_each_event_file_safe() is
2354                  * a double loop. After finding the call for this
2355                  * trace_array, we use break to jump to the next
2356                  * trace_array.
2357                  */
2358                 break;
2359         } while_for_each_event_file();
2360
2361         __trace_remove_event_call(call);
2362
2363         return 0;
2364  busy:
2365         /* No need to clear the trace now */
2366         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2367                 tr->clear_trace = false;
2368         }
2369         return -EBUSY;
2370 }
2371
2372 /* Remove an event_call */
2373 int trace_remove_event_call(struct trace_event_call *call)
2374 {
2375         int ret;
2376
2377         lockdep_assert_held(&event_mutex);
2378
2379         mutex_lock(&trace_types_lock);
2380         down_write(&trace_event_sem);
2381         ret = probe_remove_event_call(call);
2382         up_write(&trace_event_sem);
2383         mutex_unlock(&trace_types_lock);
2384
2385         return ret;
2386 }
2387
2388 #define for_each_event(event, start, end)                       \
2389         for (event = start;                                     \
2390              (unsigned long)event < (unsigned long)end;         \
2391              event++)
2392
2393 #ifdef CONFIG_MODULES
2394
2395 static void trace_module_add_events(struct module *mod)
2396 {
2397         struct trace_event_call **call, **start, **end;
2398
2399         if (!mod->num_trace_events)
2400                 return;
2401
2402         /* Don't add infrastructure for mods without tracepoints */
2403         if (trace_module_has_bad_taint(mod)) {
2404                 pr_err("%s: module has bad taint, not creating trace events\n",
2405                        mod->name);
2406                 return;
2407         }
2408
2409         start = mod->trace_events;
2410         end = mod->trace_events + mod->num_trace_events;
2411
2412         for_each_event(call, start, end) {
2413                 __register_event(*call, mod);
2414                 __add_event_to_tracers(*call);
2415         }
2416 }
2417
2418 static void trace_module_remove_events(struct module *mod)
2419 {
2420         struct trace_event_call *call, *p;
2421
2422         down_write(&trace_event_sem);
2423         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2424                 if (call->mod == mod)
2425                         __trace_remove_event_call(call);
2426         }
2427         up_write(&trace_event_sem);
2428
2429         /*
2430          * It is safest to reset the ring buffer if the module being unloaded
2431          * registered any events that were used. The only worry is if
2432          * a new module gets loaded, and takes on the same id as the events
2433          * of this module. When printing out the buffer, traced events left
2434          * over from this module may be passed to the new module events and
2435          * unexpected results may occur.
2436          */
2437         tracing_reset_all_online_cpus_unlocked();
2438 }
2439
2440 static int trace_module_notify(struct notifier_block *self,
2441                                unsigned long val, void *data)
2442 {
2443         struct module *mod = data;
2444
2445         mutex_lock(&event_mutex);
2446         mutex_lock(&trace_types_lock);
2447         switch (val) {
2448         case MODULE_STATE_COMING:
2449                 trace_module_add_events(mod);
2450                 break;
2451         case MODULE_STATE_GOING:
2452                 trace_module_remove_events(mod);
2453                 break;
2454         }
2455         mutex_unlock(&trace_types_lock);
2456         mutex_unlock(&event_mutex);
2457
2458         return 0;
2459 }
2460
2461 static struct notifier_block trace_module_nb = {
2462         .notifier_call = trace_module_notify,
2463         .priority = 1, /* higher than trace.c module notify */
2464 };
2465 #endif /* CONFIG_MODULES */
2466
2467 /* Create a new event directory structure for a trace directory. */
2468 static void
2469 __trace_add_event_dirs(struct trace_array *tr)
2470 {
2471         struct trace_event_call *call;
2472         int ret;
2473
2474         list_for_each_entry(call, &ftrace_events, list) {
2475                 ret = __trace_add_new_event(call, tr);
2476                 if (ret < 0)
2477                         pr_warn("Could not create directory for event %s\n",
2478                                 trace_event_name(call));
2479         }
2480 }
2481
2482 /* Returns any file that matches the system and event */
2483 struct trace_event_file *
2484 __find_event_file(struct trace_array *tr, const char *system, const char *event)
2485 {
2486         struct trace_event_file *file;
2487         struct trace_event_call *call;
2488         const char *name;
2489
2490         list_for_each_entry(file, &tr->events, list) {
2491
2492                 call = file->event_call;
2493                 name = trace_event_name(call);
2494
2495                 if (!name || !call->class)
2496                         continue;
2497
2498                 if (strcmp(event, name) == 0 &&
2499                     strcmp(system, call->class->system) == 0)
2500                         return file;
2501         }
2502         return NULL;
2503 }
2504
2505 /* Returns valid trace event files that match system and event */
2506 struct trace_event_file *
2507 find_event_file(struct trace_array *tr, const char *system, const char *event)
2508 {
2509         struct trace_event_file *file;
2510
2511         file = __find_event_file(tr, system, event);
2512         if (!file || !file->event_call->class->reg ||
2513             file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2514                 return NULL;
2515
2516         return file;
2517 }
2518
2519 #ifdef CONFIG_DYNAMIC_FTRACE
2520
2521 /* Avoid typos */
2522 #define ENABLE_EVENT_STR        "enable_event"
2523 #define DISABLE_EVENT_STR       "disable_event"
2524
2525 struct event_probe_data {
2526         struct trace_event_file *file;
2527         unsigned long                   count;
2528         int                             ref;
2529         bool                            enable;
2530 };
2531
2532 static void update_event_probe(struct event_probe_data *data)
2533 {
2534         if (data->enable)
2535                 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2536         else
2537                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2538 }
2539
2540 static void
2541 event_enable_probe(unsigned long ip, unsigned long parent_ip,
2542                    struct trace_array *tr, struct ftrace_probe_ops *ops,
2543                    void *data)
2544 {
2545         struct ftrace_func_mapper *mapper = data;
2546         struct event_probe_data *edata;
2547         void **pdata;
2548
2549         pdata = ftrace_func_mapper_find_ip(mapper, ip);
2550         if (!pdata || !*pdata)
2551                 return;
2552
2553         edata = *pdata;
2554         update_event_probe(edata);
2555 }
2556
2557 static void
2558 event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
2559                          struct trace_array *tr, struct ftrace_probe_ops *ops,
2560                          void *data)
2561 {
2562         struct ftrace_func_mapper *mapper = data;
2563         struct event_probe_data *edata;
2564         void **pdata;
2565
2566         pdata = ftrace_func_mapper_find_ip(mapper, ip);
2567         if (!pdata || !*pdata)
2568                 return;
2569
2570         edata = *pdata;
2571
2572         if (!edata->count)
2573                 return;
2574
2575         /* Skip if the event is in a state we want to switch to */
2576         if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2577                 return;
2578
2579         if (edata->count != -1)
2580                 (edata->count)--;
2581
2582         update_event_probe(edata);
2583 }
2584
2585 static int
2586 event_enable_print(struct seq_file *m, unsigned long ip,
2587                    struct ftrace_probe_ops *ops, void *data)
2588 {
2589         struct ftrace_func_mapper *mapper = data;
2590         struct event_probe_data *edata;
2591         void **pdata;
2592
2593         pdata = ftrace_func_mapper_find_ip(mapper, ip);
2594
2595         if (WARN_ON_ONCE(!pdata || !*pdata))
2596                 return 0;
2597
2598         edata = *pdata;
2599
2600         seq_printf(m, "%ps:", (void *)ip);
2601
2602         seq_printf(m, "%s:%s:%s",
2603                    edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2604                    edata->file->event_call->class->system,
2605                    trace_event_name(edata->file->event_call));
2606
2607         if (edata->count == -1)
2608                 seq_puts(m, ":unlimited\n");
2609         else
2610                 seq_printf(m, ":count=%ld\n", edata->count);
2611
2612         return 0;
2613 }
2614
2615 static int
2616 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
2617                   unsigned long ip, void *init_data, void **data)
2618 {
2619         struct ftrace_func_mapper *mapper = *data;
2620         struct event_probe_data *edata = init_data;
2621         int ret;
2622
2623         if (!mapper) {
2624                 mapper = allocate_ftrace_func_mapper();
2625                 if (!mapper)
2626                         return -ENODEV;
2627                 *data = mapper;
2628         }
2629
2630         ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
2631         if (ret < 0)
2632                 return ret;
2633
2634         edata->ref++;
2635
2636         return 0;
2637 }
2638
2639 static int free_probe_data(void *data)
2640 {
2641         struct event_probe_data *edata = data;
2642
2643         edata->ref--;
2644         if (!edata->ref) {
2645                 /* Remove the SOFT_MODE flag */
2646                 __ftrace_event_enable_disable(edata->file, 0, 1);
2647                 module_put(edata->file->event_call->mod);
2648                 kfree(edata);
2649         }
2650         return 0;
2651 }
2652
2653 static void
2654 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
2655                   unsigned long ip, void *data)
2656 {
2657         struct ftrace_func_mapper *mapper = data;
2658         struct event_probe_data *edata;
2659
2660         if (!ip) {
2661                 if (!mapper)
2662                         return;
2663                 free_ftrace_func_mapper(mapper, free_probe_data);
2664                 return;
2665         }
2666
2667         edata = ftrace_func_mapper_remove_ip(mapper, ip);
2668
2669         if (WARN_ON_ONCE(!edata))
2670                 return;
2671
2672         if (WARN_ON_ONCE(edata->ref <= 0))
2673                 return;
2674
2675         free_probe_data(edata);
2676 }
2677
2678 static struct ftrace_probe_ops event_enable_probe_ops = {
2679         .func                   = event_enable_probe,
2680         .print                  = event_enable_print,
2681         .init                   = event_enable_init,
2682         .free                   = event_enable_free,
2683 };
2684
2685 static struct ftrace_probe_ops event_enable_count_probe_ops = {
2686         .func                   = event_enable_count_probe,
2687         .print                  = event_enable_print,
2688         .init                   = event_enable_init,
2689         .free                   = event_enable_free,
2690 };
2691
2692 static struct ftrace_probe_ops event_disable_probe_ops = {
2693         .func                   = event_enable_probe,
2694         .print                  = event_enable_print,
2695         .init                   = event_enable_init,
2696         .free                   = event_enable_free,
2697 };
2698
2699 static struct ftrace_probe_ops event_disable_count_probe_ops = {
2700         .func                   = event_enable_count_probe,
2701         .print                  = event_enable_print,
2702         .init                   = event_enable_init,
2703         .free                   = event_enable_free,
2704 };
2705
2706 static int
2707 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
2708                   char *glob, char *cmd, char *param, int enabled)
2709 {
2710         struct trace_event_file *file;
2711         struct ftrace_probe_ops *ops;
2712         struct event_probe_data *data;
2713         const char *system;
2714         const char *event;
2715         char *number;
2716         bool enable;
2717         int ret;
2718
2719         if (!tr)
2720                 return -ENODEV;
2721
2722         /* hash funcs only work with set_ftrace_filter */
2723         if (!enabled || !param)
2724                 return -EINVAL;
2725
2726         system = strsep(&param, ":");
2727         if (!param)
2728                 return -EINVAL;
2729
2730         event = strsep(&param, ":");
2731
2732         mutex_lock(&event_mutex);
2733
2734         ret = -EINVAL;
2735         file = find_event_file(tr, system, event);
2736         if (!file)
2737                 goto out;
2738
2739         enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2740
2741         if (enable)
2742                 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2743         else
2744                 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2745
2746         if (glob[0] == '!') {
2747                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
2748                 goto out;
2749         }
2750
2751         ret = -ENOMEM;
2752
2753         data = kzalloc(sizeof(*data), GFP_KERNEL);
2754         if (!data)
2755                 goto out;
2756
2757         data->enable = enable;
2758         data->count = -1;
2759         data->file = file;
2760
2761         if (!param)
2762                 goto out_reg;
2763
2764         number = strsep(&param, ":");
2765
2766         ret = -EINVAL;
2767         if (!strlen(number))
2768                 goto out_free;
2769
2770         /*
2771          * We use the callback data field (which is a pointer)
2772          * as our counter.
2773          */
2774         ret = kstrtoul(number, 0, &data->count);
2775         if (ret)
2776                 goto out_free;
2777
2778  out_reg:
2779         /* Don't let event modules unload while probe registered */
2780         ret = try_module_get(file->event_call->mod);
2781         if (!ret) {
2782                 ret = -EBUSY;
2783                 goto out_free;
2784         }
2785
2786         ret = __ftrace_event_enable_disable(file, 1, 1);
2787         if (ret < 0)
2788                 goto out_put;
2789
2790         ret = register_ftrace_function_probe(glob, tr, ops, data);
2791         /*
2792          * The above returns on success the # of functions enabled,
2793          * but if it didn't find any functions it returns zero.
2794          * Consider no functions a failure too.
2795          */
2796         if (!ret) {
2797                 ret = -ENOENT;
2798                 goto out_disable;
2799         } else if (ret < 0)
2800                 goto out_disable;
2801         /* Just return zero, not the number of enabled functions */
2802         ret = 0;
2803  out:
2804         mutex_unlock(&event_mutex);
2805         return ret;
2806
2807  out_disable:
2808         __ftrace_event_enable_disable(file, 0, 1);
2809  out_put:
2810         module_put(file->event_call->mod);
2811  out_free:
2812         kfree(data);
2813         goto out;
2814 }
2815
2816 static struct ftrace_func_command event_enable_cmd = {
2817         .name                   = ENABLE_EVENT_STR,
2818         .func                   = event_enable_func,
2819 };
2820
2821 static struct ftrace_func_command event_disable_cmd = {
2822         .name                   = DISABLE_EVENT_STR,
2823         .func                   = event_enable_func,
2824 };
2825
2826 static __init int register_event_cmds(void)
2827 {
2828         int ret;
2829
2830         ret = register_ftrace_command(&event_enable_cmd);
2831         if (WARN_ON(ret < 0))
2832                 return ret;
2833         ret = register_ftrace_command(&event_disable_cmd);
2834         if (WARN_ON(ret < 0))
2835                 unregister_ftrace_command(&event_enable_cmd);
2836         return ret;
2837 }
2838 #else
2839 static inline int register_event_cmds(void) { return 0; }
2840 #endif /* CONFIG_DYNAMIC_FTRACE */
2841
2842 /*
2843  * The top level array has already had its trace_event_file
2844  * descriptors created in order to allow for early events to
2845  * be recorded. This function is called after the tracefs has been
2846  * initialized, and we now have to create the files associated
2847  * to the events.
2848  */
2849 static __init void
2850 __trace_early_add_event_dirs(struct trace_array *tr)
2851 {
2852         struct trace_event_file *file;
2853         int ret;
2854
2855
2856         list_for_each_entry(file, &tr->events, list) {
2857                 ret = event_create_dir(tr->event_dir, file);
2858                 if (ret < 0)
2859                         pr_warn("Could not create directory for event %s\n",
2860                                 trace_event_name(file->event_call));
2861         }
2862 }
2863
2864 /*
2865  * For early boot up, the top trace array requires to have
2866  * a list of events that can be enabled. This must be done before
2867  * the filesystem is set up in order to allow events to be traced
2868  * early.
2869  */
2870 static __init void
2871 __trace_early_add_events(struct trace_array *tr)
2872 {
2873         struct trace_event_call *call;
2874         int ret;
2875
2876         list_for_each_entry(call, &ftrace_events, list) {
2877                 /* Early boot up should not have any modules loaded */
2878                 if (WARN_ON_ONCE(call->mod))
2879                         continue;
2880
2881                 ret = __trace_early_add_new_event(call, tr);
2882                 if (ret < 0)
2883                         pr_warn("Could not create early event %s\n",
2884                                 trace_event_name(call));
2885         }
2886 }
2887
2888 /* Remove the event directory structure for a trace directory. */
2889 static void
2890 __trace_remove_event_dirs(struct trace_array *tr)
2891 {
2892         struct trace_event_file *file, *next;
2893
2894         list_for_each_entry_safe(file, next, &tr->events, list)
2895                 remove_event_file_dir(file);
2896 }
2897
2898 static void __add_event_to_tracers(struct trace_event_call *call)
2899 {
2900         struct trace_array *tr;
2901
2902         list_for_each_entry(tr, &ftrace_trace_arrays, list)
2903                 __trace_add_new_event(call, tr);
2904 }
2905
2906 extern struct trace_event_call *__start_ftrace_events[];
2907 extern struct trace_event_call *__stop_ftrace_events[];
2908
2909 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2910
2911 static __init int setup_trace_event(char *str)
2912 {
2913         strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2914         ring_buffer_expanded = true;
2915         tracing_selftest_disabled = true;
2916
2917         return 1;
2918 }
2919 __setup("trace_event=", setup_trace_event);
2920
2921 /* Expects to have event_mutex held when called */
2922 static int
2923 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2924 {
2925         struct dentry *d_events;
2926         struct dentry *entry;
2927
2928         entry = tracefs_create_file("set_event", 0644, parent,
2929                                     tr, &ftrace_set_event_fops);
2930         if (!entry) {
2931                 pr_warn("Could not create tracefs 'set_event' entry\n");
2932                 return -ENOMEM;
2933         }
2934
2935         d_events = tracefs_create_dir("events", parent);
2936         if (!d_events) {
2937                 pr_warn("Could not create tracefs 'events' directory\n");
2938                 return -ENOMEM;
2939         }
2940
2941         entry = trace_create_file("enable", 0644, d_events,
2942                                   tr, &ftrace_tr_enable_fops);
2943         if (!entry) {
2944                 pr_warn("Could not create tracefs 'enable' entry\n");
2945                 return -ENOMEM;
2946         }
2947
2948         /* There are not as crucial, just warn if they are not created */
2949
2950         entry = tracefs_create_file("set_event_pid", 0644, parent,
2951                                     tr, &ftrace_set_event_pid_fops);
2952         if (!entry)
2953                 pr_warn("Could not create tracefs 'set_event_pid' entry\n");
2954
2955         /* ring buffer internal formats */
2956         entry = trace_create_file("header_page", 0444, d_events,
2957                                   ring_buffer_print_page_header,
2958                                   &ftrace_show_header_fops);
2959         if (!entry)
2960                 pr_warn("Could not create tracefs 'header_page' entry\n");
2961
2962         entry = trace_create_file("header_event", 0444, d_events,
2963                                   ring_buffer_print_entry_header,
2964                                   &ftrace_show_header_fops);
2965         if (!entry)
2966                 pr_warn("Could not create tracefs 'header_event' entry\n");
2967
2968         tr->event_dir = d_events;
2969
2970         return 0;
2971 }
2972
2973 /**
2974  * event_trace_add_tracer - add a instance of a trace_array to events
2975  * @parent: The parent dentry to place the files/directories for events in
2976  * @tr: The trace array associated with these events
2977  *
2978  * When a new instance is created, it needs to set up its events
2979  * directory, as well as other files associated with events. It also
2980  * creates the event hierachry in the @parent/events directory.
2981  *
2982  * Returns 0 on success.
2983  *
2984  * Must be called with event_mutex held.
2985  */
2986 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
2987 {
2988         int ret;
2989
2990         lockdep_assert_held(&event_mutex);
2991
2992         ret = create_event_toplevel_files(parent, tr);
2993         if (ret)
2994                 goto out;
2995
2996         down_write(&trace_event_sem);
2997         __trace_add_event_dirs(tr);
2998         up_write(&trace_event_sem);
2999
3000  out:
3001         return ret;
3002 }
3003
3004 /*
3005  * The top trace array already had its file descriptors created.
3006  * Now the files themselves need to be created.
3007  */
3008 static __init int
3009 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
3010 {
3011         int ret;
3012
3013         mutex_lock(&event_mutex);
3014
3015         ret = create_event_toplevel_files(parent, tr);
3016         if (ret)
3017                 goto out_unlock;
3018
3019         down_write(&trace_event_sem);
3020         __trace_early_add_event_dirs(tr);
3021         up_write(&trace_event_sem);
3022
3023  out_unlock:
3024         mutex_unlock(&event_mutex);
3025
3026         return ret;
3027 }
3028
3029 /* Must be called with event_mutex held */
3030 int event_trace_del_tracer(struct trace_array *tr)
3031 {
3032         lockdep_assert_held(&event_mutex);
3033
3034         /* Disable any event triggers and associated soft-disabled events */
3035         clear_event_triggers(tr);
3036
3037         /* Clear the pid list */
3038         __ftrace_clear_event_pids(tr);
3039
3040         /* Disable any running events */
3041         __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3042
3043         /* Make sure no more events are being executed */
3044         tracepoint_synchronize_unregister();
3045
3046         down_write(&trace_event_sem);
3047         __trace_remove_event_dirs(tr);
3048         tracefs_remove_recursive(tr->event_dir);
3049         up_write(&trace_event_sem);
3050
3051         tr->event_dir = NULL;
3052
3053         return 0;
3054 }
3055
3056 static __init int event_trace_memsetup(void)
3057 {
3058         field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3059         file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3060         return 0;
3061 }
3062
3063 static __init void
3064 early_enable_events(struct trace_array *tr, bool disable_first)
3065 {
3066         char *buf = bootup_event_buf;
3067         char *token;
3068         int ret;
3069
3070         while (true) {
3071                 token = strsep(&buf, ",");
3072
3073                 if (!token)
3074                         break;
3075
3076                 if (*token) {
3077                         /* Restarting syscalls requires that we stop them first */
3078                         if (disable_first)
3079                                 ftrace_set_clr_event(tr, token, 0);
3080
3081                         ret = ftrace_set_clr_event(tr, token, 1);
3082                         if (ret)
3083                                 pr_warn("Failed to enable trace event: %s\n", token);
3084                 }
3085
3086                 /* Put back the comma to allow this to be called again */
3087                 if (buf)
3088                         *(buf - 1) = ',';
3089         }
3090 }
3091
3092 static __init int event_trace_enable(void)
3093 {
3094         struct trace_array *tr = top_trace_array();
3095         struct trace_event_call **iter, *call;
3096         int ret;
3097
3098         if (!tr)
3099                 return -ENODEV;
3100
3101         for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3102
3103                 call = *iter;
3104                 ret = event_init(call);
3105                 if (!ret)
3106                         list_add(&call->list, &ftrace_events);
3107         }
3108
3109         /*
3110          * We need the top trace array to have a working set of trace
3111          * points at early init, before the debug files and directories
3112          * are created. Create the file entries now, and attach them
3113          * to the actual file dentries later.
3114          */
3115         __trace_early_add_events(tr);
3116
3117         early_enable_events(tr, false);
3118
3119         trace_printk_start_comm();
3120
3121         register_event_cmds();
3122
3123         register_trigger_cmds();
3124
3125         return 0;
3126 }
3127
3128 /*
3129  * event_trace_enable() is called from trace_event_init() first to
3130  * initialize events and perhaps start any events that are on the
3131  * command line. Unfortunately, there are some events that will not
3132  * start this early, like the system call tracepoints that need
3133  * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3134  * is called before pid 1 starts, and this flag is never set, making
3135  * the syscall tracepoint never get reached, but the event is enabled
3136  * regardless (and not doing anything).
3137  */
3138 static __init int event_trace_enable_again(void)
3139 {
3140         struct trace_array *tr;
3141
3142         tr = top_trace_array();
3143         if (!tr)
3144                 return -ENODEV;
3145
3146         early_enable_events(tr, true);
3147
3148         return 0;
3149 }
3150
3151 early_initcall(event_trace_enable_again);
3152
3153 __init int event_trace_init(void)
3154 {
3155         struct trace_array *tr;
3156         struct dentry *d_tracer;
3157         struct dentry *entry;
3158         int ret;
3159
3160         tr = top_trace_array();
3161         if (!tr)
3162                 return -ENODEV;
3163
3164         d_tracer = tracing_init_dentry();
3165         if (IS_ERR(d_tracer))
3166                 return 0;
3167
3168         entry = tracefs_create_file("available_events", 0444, d_tracer,
3169                                     tr, &ftrace_avail_fops);
3170         if (!entry)
3171                 pr_warn("Could not create tracefs 'available_events' entry\n");
3172
3173         if (trace_define_generic_fields())
3174                 pr_warn("tracing: Failed to allocated generic fields");
3175
3176         if (trace_define_common_fields())
3177                 pr_warn("tracing: Failed to allocate common fields");
3178
3179         ret = early_event_add_tracer(d_tracer, tr);
3180         if (ret)
3181                 return ret;
3182
3183 #ifdef CONFIG_MODULES
3184         ret = register_module_notifier(&trace_module_nb);
3185         if (ret)
3186                 pr_warn("Failed to register trace events module notifier\n");
3187 #endif
3188         return 0;
3189 }
3190
3191 void __init trace_event_init(void)
3192 {
3193         event_trace_memsetup();
3194         init_ftrace_syscalls();
3195         event_trace_enable();
3196 }
3197
3198 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
3199
3200 static DEFINE_SPINLOCK(test_spinlock);
3201 static DEFINE_SPINLOCK(test_spinlock_irq);
3202 static DEFINE_MUTEX(test_mutex);
3203
3204 static __init void test_work(struct work_struct *dummy)
3205 {
3206         spin_lock(&test_spinlock);
3207         spin_lock_irq(&test_spinlock_irq);
3208         udelay(1);
3209         spin_unlock_irq(&test_spinlock_irq);
3210         spin_unlock(&test_spinlock);
3211
3212         mutex_lock(&test_mutex);
3213         msleep(1);
3214         mutex_unlock(&test_mutex);
3215 }
3216
3217 static __init int event_test_thread(void *unused)
3218 {
3219         void *test_malloc;
3220
3221         test_malloc = kmalloc(1234, GFP_KERNEL);
3222         if (!test_malloc)
3223                 pr_info("failed to kmalloc\n");
3224
3225         schedule_on_each_cpu(test_work);
3226
3227         kfree(test_malloc);
3228
3229         set_current_state(TASK_INTERRUPTIBLE);
3230         while (!kthread_should_stop()) {
3231                 schedule();
3232                 set_current_state(TASK_INTERRUPTIBLE);
3233         }
3234         __set_current_state(TASK_RUNNING);
3235
3236         return 0;
3237 }
3238
3239 /*
3240  * Do various things that may trigger events.
3241  */
3242 static __init void event_test_stuff(void)
3243 {
3244         struct task_struct *test_thread;
3245
3246         test_thread = kthread_run(event_test_thread, NULL, "test-events");
3247         msleep(1);
3248         kthread_stop(test_thread);
3249 }
3250
3251 /*
3252  * For every trace event defined, we will test each trace point separately,
3253  * and then by groups, and finally all trace points.
3254  */
3255 static __init void event_trace_self_tests(void)
3256 {
3257         struct trace_subsystem_dir *dir;
3258         struct trace_event_file *file;
3259         struct trace_event_call *call;
3260         struct event_subsystem *system;
3261         struct trace_array *tr;
3262         int ret;
3263
3264         tr = top_trace_array();
3265         if (!tr)
3266                 return;
3267
3268         pr_info("Running tests on trace events:\n");
3269
3270         list_for_each_entry(file, &tr->events, list) {
3271
3272                 call = file->event_call;
3273
3274                 /* Only test those that have a probe */
3275                 if (!call->class || !call->class->probe)
3276                         continue;
3277
3278 /*
3279  * Testing syscall events here is pretty useless, but
3280  * we still do it if configured. But this is time consuming.
3281  * What we really need is a user thread to perform the
3282  * syscalls as we test.
3283  */
3284 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3285                 if (call->class->system &&
3286                     strcmp(call->class->system, "syscalls") == 0)
3287                         continue;
3288 #endif
3289
3290                 pr_info("Testing event %s: ", trace_event_name(call));
3291
3292                 /*
3293                  * If an event is already enabled, someone is using
3294                  * it and the self test should not be on.
3295                  */
3296                 if (file->flags & EVENT_FILE_FL_ENABLED) {
3297                         pr_warn("Enabled event during self test!\n");
3298                         WARN_ON_ONCE(1);
3299                         continue;
3300                 }
3301
3302                 ftrace_event_enable_disable(file, 1);
3303                 event_test_stuff();
3304                 ftrace_event_enable_disable(file, 0);
3305
3306                 pr_cont("OK\n");
3307         }
3308
3309         /* Now test at the sub system level */
3310
3311         pr_info("Running tests on trace event systems:\n");
3312
3313         list_for_each_entry(dir, &tr->systems, list) {
3314
3315                 system = dir->subsystem;
3316
3317                 /* the ftrace system is special, skip it */
3318                 if (strcmp(system->name, "ftrace") == 0)
3319                         continue;
3320
3321                 pr_info("Testing event system %s: ", system->name);
3322
3323                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3324                 if (WARN_ON_ONCE(ret)) {
3325                         pr_warn("error enabling system %s\n",
3326                                 system->name);
3327                         continue;
3328                 }
3329
3330                 event_test_stuff();
3331
3332                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3333                 if (WARN_ON_ONCE(ret)) {
3334                         pr_warn("error disabling system %s\n",
3335                                 system->name);
3336                         continue;
3337                 }
3338
3339                 pr_cont("OK\n");
3340         }
3341
3342         /* Test with all events enabled */
3343
3344         pr_info("Running tests on all trace events:\n");
3345         pr_info("Testing all events: ");
3346
3347         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3348         if (WARN_ON_ONCE(ret)) {
3349                 pr_warn("error enabling all events\n");
3350                 return;
3351         }
3352
3353         event_test_stuff();
3354
3355         /* reset sysname */
3356         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3357         if (WARN_ON_ONCE(ret)) {
3358                 pr_warn("error disabling all events\n");
3359                 return;
3360         }
3361
3362         pr_cont("OK\n");
3363 }
3364
3365 #ifdef CONFIG_FUNCTION_TRACER
3366
3367 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3368
3369 static struct trace_event_file event_trace_file __initdata;
3370
3371 static void __init
3372 function_test_events_call(unsigned long ip, unsigned long parent_ip,
3373                           struct ftrace_ops *op, struct pt_regs *pt_regs)
3374 {
3375         struct ring_buffer_event *event;
3376         struct ring_buffer *buffer;
3377         struct ftrace_entry *entry;
3378         unsigned long flags;
3379         long disabled;
3380         int cpu;
3381         int pc;
3382
3383         pc = preempt_count();
3384         preempt_disable_notrace();
3385         cpu = raw_smp_processor_id();
3386         disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3387
3388         if (disabled != 1)
3389                 goto out;
3390
3391         local_save_flags(flags);
3392
3393         event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
3394                                                 TRACE_FN, sizeof(*entry),
3395                                                 flags, pc);
3396         if (!event)
3397                 goto out;
3398         entry   = ring_buffer_event_data(event);
3399         entry->ip                       = ip;
3400         entry->parent_ip                = parent_ip;
3401
3402         event_trigger_unlock_commit(&event_trace_file, buffer, event,
3403                                     entry, flags, pc);
3404  out:
3405         atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3406         preempt_enable_notrace();
3407 }
3408
3409 static struct ftrace_ops trace_ops __initdata  =
3410 {
3411         .func = function_test_events_call,
3412         .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3413 };
3414
3415 static __init void event_trace_self_test_with_function(void)
3416 {
3417         int ret;
3418
3419         event_trace_file.tr = top_trace_array();
3420         if (WARN_ON(!event_trace_file.tr))
3421                 return;
3422
3423         ret = register_ftrace_function(&trace_ops);
3424         if (WARN_ON(ret < 0)) {
3425                 pr_info("Failed to enable function tracer for event tests\n");
3426                 return;
3427         }
3428         pr_info("Running tests again, along with the function tracer\n");
3429         event_trace_self_tests();
3430         unregister_ftrace_function(&trace_ops);
3431 }
3432 #else
3433 static __init void event_trace_self_test_with_function(void)
3434 {
3435 }
3436 #endif
3437
3438 static __init int event_trace_self_tests_init(void)
3439 {
3440         if (!tracing_selftest_disabled) {
3441                 event_trace_self_tests();
3442                 event_trace_self_test_with_function();
3443         }
3444
3445         return 0;
3446 }
3447
3448 late_initcall(event_trace_self_tests_init);
3449
3450 #endif