GNU Linux-libre 4.14.254-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list) {
498                 trace_parser_put(&parser);
499                 return -ENOMEM;
500         }
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 trace_parser_put(&parser);
511                 kfree(pid_list);
512                 return -ENOMEM;
513         }
514
515         if (filtered_pids) {
516                 /* copy the current bits to the new max */
517                 for_each_set_bit(pid, filtered_pids->pids,
518                                  filtered_pids->pid_max) {
519                         set_bit(pid, pid_list->pids);
520                         nr_pids++;
521                 }
522         }
523
524         while (cnt > 0) {
525
526                 pos = 0;
527
528                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
529                 if (ret < 0 || !trace_parser_loaded(&parser))
530                         break;
531
532                 read += ret;
533                 ubuf += ret;
534                 cnt -= ret;
535
536                 parser.buffer[parser.idx] = 0;
537
538                 ret = -EINVAL;
539                 if (kstrtoul(parser.buffer, 0, &val))
540                         break;
541                 if (val >= pid_list->pid_max)
542                         break;
543
544                 pid = (pid_t)val;
545
546                 set_bit(pid, pid_list->pids);
547                 nr_pids++;
548
549                 trace_parser_clear(&parser);
550                 ret = 0;
551         }
552         trace_parser_put(&parser);
553
554         if (ret < 0) {
555                 trace_free_pid_list(pid_list);
556                 return ret;
557         }
558
559         if (!nr_pids) {
560                 /* Cleared the list of pids */
561                 trace_free_pid_list(pid_list);
562                 read = ret;
563                 pid_list = NULL;
564         }
565
566         *new_pid_list = pid_list;
567
568         return read;
569 }
570
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573         u64 ts;
574
575         /* Early boot up does not have a buffer yet */
576         if (!buf->buffer)
577                 return trace_clock_local();
578
579         ts = ring_buffer_time_stamp(buf->buffer, cpu);
580         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582         return ts;
583 }
584
585 u64 ftrace_now(int cpu)
586 {
587         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601         /*
602          * For quick access (irqsoff uses this in fast path), just
603          * return the mirror variable of the state of the ring buffer.
604          * It's a little racy, but we don't really care.
605          */
606         smp_rmb();
607         return !global_trace.buffer_disabled;
608 }
609
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer            *trace_types __read_mostly;
626
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
658 static inline void trace_access_lock(int cpu)
659 {
660         if (cpu == RING_BUFFER_ALL_CPUS) {
661                 /* gain it for accessing the whole ring buffer. */
662                 down_write(&all_cpu_access_lock);
663         } else {
664                 /* gain it for accessing a cpu ring buffer. */
665
666                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667                 down_read(&all_cpu_access_lock);
668
669                 /* Secondly block other access to this @cpu ring buffer. */
670                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671         }
672 }
673
674 static inline void trace_access_unlock(int cpu)
675 {
676         if (cpu == RING_BUFFER_ALL_CPUS) {
677                 up_write(&all_cpu_access_lock);
678         } else {
679                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680                 up_read(&all_cpu_access_lock);
681         }
682 }
683
684 static inline void trace_access_lock_init(void)
685 {
686         int cpu;
687
688         for_each_possible_cpu(cpu)
689                 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
696 static inline void trace_access_lock(int cpu)
697 {
698         (void)cpu;
699         mutex_lock(&access_lock);
700 }
701
702 static inline void trace_access_unlock(int cpu)
703 {
704         (void)cpu;
705         mutex_unlock(&access_lock);
706 }
707
708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716                                  unsigned long flags,
717                                  int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719                                       struct ring_buffer *buffer,
720                                       unsigned long flags,
721                                       int skip, int pc, struct pt_regs *regs);
722
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725                                         unsigned long flags,
726                                         int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730                                       struct ring_buffer *buffer,
731                                       unsigned long flags,
732                                       int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740                   int type, unsigned long flags, int pc)
741 {
742         struct trace_entry *ent = ring_buffer_event_data(event);
743
744         tracing_generic_entry_update(ent, flags, pc);
745         ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750                           int type,
751                           unsigned long len,
752                           unsigned long flags, int pc)
753 {
754         struct ring_buffer_event *event;
755
756         event = ring_buffer_lock_reserve(buffer, len);
757         if (event != NULL)
758                 trace_event_setup(event, type, flags, pc);
759
760         return event;
761 }
762
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765         if (tr->trace_buffer.buffer)
766                 ring_buffer_record_on(tr->trace_buffer.buffer);
767         /*
768          * This flag is looked at when buffers haven't been allocated
769          * yet, or by some tracers (like irqsoff), that just want to
770          * know if the ring buffer has been disabled, but it can handle
771          * races of where it gets disabled but we still do a record.
772          * As the check is in the fast path of the tracers, it is more
773          * important to be fast than accurate.
774          */
775         tr->buffer_disabled = 0;
776         /* Make the flag seen by readers */
777         smp_wmb();
778 }
779
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788         tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796         __this_cpu_write(trace_taskinfo_save, true);
797
798         /* If this is the temp buffer, we need to commit fully */
799         if (this_cpu_read(trace_buffered_event) == event) {
800                 /* Length is in event->array[0] */
801                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802                 /* Release the temp buffer */
803                 this_cpu_dec(trace_buffered_event_cnt);
804         } else
805                 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:    The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816         struct ring_buffer_event *event;
817         struct ring_buffer *buffer;
818         struct print_entry *entry;
819         unsigned long irq_flags;
820         int alloc;
821         int pc;
822
823         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824                 return 0;
825
826         pc = preempt_count();
827
828         if (unlikely(tracing_selftest_running || tracing_disabled))
829                 return 0;
830
831         alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833         local_save_flags(irq_flags);
834         buffer = global_trace.trace_buffer.buffer;
835         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
836                                             irq_flags, pc);
837         if (!event)
838                 return 0;
839
840         entry = ring_buffer_event_data(event);
841         entry->ip = ip;
842
843         memcpy(&entry->buf, str, size);
844
845         /* Add a newline if necessary */
846         if (entry->buf[size - 1] != '\n') {
847                 entry->buf[size] = '\n';
848                 entry->buf[size + 1] = '\0';
849         } else
850                 entry->buf[size] = '\0';
851
852         __buffer_unlock_commit(buffer, event);
853         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855         return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:    The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866         struct ring_buffer_event *event;
867         struct ring_buffer *buffer;
868         struct bputs_entry *entry;
869         unsigned long irq_flags;
870         int size = sizeof(struct bputs_entry);
871         int pc;
872
873         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874                 return 0;
875
876         pc = preempt_count();
877
878         if (unlikely(tracing_selftest_running || tracing_disabled))
879                 return 0;
880
881         local_save_flags(irq_flags);
882         buffer = global_trace.trace_buffer.buffer;
883         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884                                             irq_flags, pc);
885         if (!event)
886                 return 0;
887
888         entry = ring_buffer_event_data(event);
889         entry->ip                       = ip;
890         entry->str                      = str;
891
892         __buffer_unlock_commit(buffer, event);
893         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895         return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902         struct tracer *tracer = tr->current_trace;
903         unsigned long flags;
904
905         if (in_nmi()) {
906                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907                 internal_trace_puts("*** snapshot is being ignored        ***\n");
908                 return;
909         }
910
911         if (!tr->allocated_snapshot) {
912                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913                 internal_trace_puts("*** stopping trace here!   ***\n");
914                 tracing_off();
915                 return;
916         }
917
918         /* Note, snapshot can not be used when the tracer uses it */
919         if (tracer->use_max_tr) {
920                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922                 return;
923         }
924
925         local_irq_save(flags);
926         update_max_tr(tr, current, smp_processor_id());
927         local_irq_restore(flags);
928 }
929
930 /**
931  * trace_snapshot - take a snapshot of the current buffer.
932  *
933  * This causes a swap between the snapshot buffer and the current live
934  * tracing buffer. You can use this to take snapshots of the live
935  * trace when some condition is triggered, but continue to trace.
936  *
937  * Note, make sure to allocate the snapshot with either
938  * a tracing_snapshot_alloc(), or by doing it manually
939  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940  *
941  * If the snapshot buffer is not allocated, it will stop tracing.
942  * Basically making a permanent snapshot.
943  */
944 void tracing_snapshot(void)
945 {
946         struct trace_array *tr = &global_trace;
947
948         tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953                                         struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955
956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958         int ret;
959
960         if (!tr->allocated_snapshot) {
961
962                 /* allocate spare buffer */
963                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
964                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965                 if (ret < 0)
966                         return ret;
967
968                 tr->allocated_snapshot = true;
969         }
970
971         return 0;
972 }
973
974 static void free_snapshot(struct trace_array *tr)
975 {
976         /*
977          * We don't free the ring buffer. instead, resize it because
978          * The max_tr ring buffer has some state (e.g. ring->clock) and
979          * we want preserve it.
980          */
981         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982         set_buffer_entries(&tr->max_buffer, 1);
983         tracing_reset_online_cpus(&tr->max_buffer);
984         tr->allocated_snapshot = false;
985 }
986
987 /**
988  * tracing_alloc_snapshot - allocate snapshot buffer.
989  *
990  * This only allocates the snapshot buffer if it isn't already
991  * allocated - it doesn't also take a snapshot.
992  *
993  * This is meant to be used in cases where the snapshot buffer needs
994  * to be set up for events that can't sleep but need to be able to
995  * trigger a snapshot.
996  */
997 int tracing_alloc_snapshot(void)
998 {
999         struct trace_array *tr = &global_trace;
1000         int ret;
1001
1002         ret = tracing_alloc_snapshot_instance(tr);
1003         WARN_ON(ret < 0);
1004
1005         return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008
1009 /**
1010  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011  *
1012  * This is similar to trace_snapshot(), but it will allocate the
1013  * snapshot buffer if it isn't already allocated. Use this only
1014  * where it is safe to sleep, as the allocation may sleep.
1015  *
1016  * This causes a swap between the snapshot buffer and the current live
1017  * tracing buffer. You can use this to take snapshots of the live
1018  * trace when some condition is triggered, but continue to trace.
1019  */
1020 void tracing_snapshot_alloc(void)
1021 {
1022         int ret;
1023
1024         ret = tracing_alloc_snapshot();
1025         if (ret < 0)
1026                 return;
1027
1028         tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
1032 void tracing_snapshot(void)
1033 {
1034         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
1037 int tracing_alloc_snapshot(void)
1038 {
1039         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040         return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1043 void tracing_snapshot_alloc(void)
1044 {
1045         /* Give warning */
1046         tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050
1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053         if (tr->trace_buffer.buffer)
1054                 ring_buffer_record_off(tr->trace_buffer.buffer);
1055         /*
1056          * This flag is looked at when buffers haven't been allocated
1057          * yet, or by some tracers (like irqsoff), that just want to
1058          * know if the ring buffer has been disabled, but it can handle
1059          * races of where it gets disabled but we still do a record.
1060          * As the check is in the fast path of the tracers, it is more
1061          * important to be fast than accurate.
1062          */
1063         tr->buffer_disabled = 1;
1064         /* Make the flag seen by readers */
1065         smp_wmb();
1066 }
1067
1068 /**
1069  * tracing_off - turn off tracing buffers
1070  *
1071  * This function stops the tracing buffers from recording data.
1072  * It does not disable any overhead the tracers themselves may
1073  * be causing. This function simply causes all recording to
1074  * the ring buffers to fail.
1075  */
1076 void tracing_off(void)
1077 {
1078         tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081
1082 void disable_trace_on_warning(void)
1083 {
1084         if (__disable_trace_on_warning)
1085                 tracing_off();
1086 }
1087
1088 /**
1089  * tracer_tracing_is_on - show real state of ring buffer enabled
1090  * @tr : the trace array to know if ring buffer is enabled
1091  *
1092  * Shows real state of the ring buffer if it is enabled or not.
1093  */
1094 int tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096         if (tr->trace_buffer.buffer)
1097                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098         return !tr->buffer_disabled;
1099 }
1100
1101 /**
1102  * tracing_is_on - show state of ring buffers enabled
1103  */
1104 int tracing_is_on(void)
1105 {
1106         return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109
1110 static int __init set_buf_size(char *str)
1111 {
1112         unsigned long buf_size;
1113
1114         if (!str)
1115                 return 0;
1116         buf_size = memparse(str, &str);
1117         /* nr_entries can not be zero */
1118         if (buf_size == 0)
1119                 return 0;
1120         trace_buf_size = buf_size;
1121         return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124
1125 static int __init set_tracing_thresh(char *str)
1126 {
1127         unsigned long threshold;
1128         int ret;
1129
1130         if (!str)
1131                 return 0;
1132         ret = kstrtoul(str, 0, &threshold);
1133         if (ret < 0)
1134                 return 0;
1135         tracing_thresh = threshold * 1000;
1136         return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139
1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142         return nsecs / 1000;
1143 }
1144
1145 /*
1146  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149  * of strings in the order that the evals (enum) were defined.
1150  */
1151 #undef C
1152 #define C(a, b) b
1153
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156         TRACE_FLAGS
1157         NULL
1158 };
1159
1160 static struct {
1161         u64 (*func)(void);
1162         const char *name;
1163         int in_ns;              /* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165         { trace_clock_local,            "local",        1 },
1166         { trace_clock_global,           "global",       1 },
1167         { trace_clock_counter,          "counter",      0 },
1168         { trace_clock_jiffies,          "uptime",       0 },
1169         { trace_clock,                  "perf",         1 },
1170         { ktime_get_mono_fast_ns,       "mono",         1 },
1171         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1172         { ktime_get_boot_fast_ns,       "boot",         1 },
1173         ARCH_TRACE_CLOCKS
1174 };
1175
1176 /*
1177  * trace_parser_get_init - gets the buffer for trace parser
1178  */
1179 int trace_parser_get_init(struct trace_parser *parser, int size)
1180 {
1181         memset(parser, 0, sizeof(*parser));
1182
1183         parser->buffer = kmalloc(size, GFP_KERNEL);
1184         if (!parser->buffer)
1185                 return 1;
1186
1187         parser->size = size;
1188         return 0;
1189 }
1190
1191 /*
1192  * trace_parser_put - frees the buffer for trace parser
1193  */
1194 void trace_parser_put(struct trace_parser *parser)
1195 {
1196         kfree(parser->buffer);
1197         parser->buffer = NULL;
1198 }
1199
1200 /*
1201  * trace_get_user - reads the user input string separated by  space
1202  * (matched by isspace(ch))
1203  *
1204  * For each string found the 'struct trace_parser' is updated,
1205  * and the function returns.
1206  *
1207  * Returns number of bytes read.
1208  *
1209  * See kernel/trace/trace.h for 'struct trace_parser' details.
1210  */
1211 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1212         size_t cnt, loff_t *ppos)
1213 {
1214         char ch;
1215         size_t read = 0;
1216         ssize_t ret;
1217
1218         if (!*ppos)
1219                 trace_parser_clear(parser);
1220
1221         ret = get_user(ch, ubuf++);
1222         if (ret)
1223                 goto out;
1224
1225         read++;
1226         cnt--;
1227
1228         /*
1229          * The parser is not finished with the last write,
1230          * continue reading the user input without skipping spaces.
1231          */
1232         if (!parser->cont) {
1233                 /* skip white space */
1234                 while (cnt && isspace(ch)) {
1235                         ret = get_user(ch, ubuf++);
1236                         if (ret)
1237                                 goto out;
1238                         read++;
1239                         cnt--;
1240                 }
1241
1242                 /* only spaces were written */
1243                 if (isspace(ch)) {
1244                         *ppos += read;
1245                         ret = read;
1246                         goto out;
1247                 }
1248
1249                 parser->idx = 0;
1250         }
1251
1252         /* read the non-space input */
1253         while (cnt && !isspace(ch)) {
1254                 if (parser->idx < parser->size - 1)
1255                         parser->buffer[parser->idx++] = ch;
1256                 else {
1257                         ret = -EINVAL;
1258                         goto out;
1259                 }
1260                 ret = get_user(ch, ubuf++);
1261                 if (ret)
1262                         goto out;
1263                 read++;
1264                 cnt--;
1265         }
1266
1267         /* We either got finished input or we have to wait for another call. */
1268         if (isspace(ch)) {
1269                 parser->buffer[parser->idx] = 0;
1270                 parser->cont = false;
1271         } else if (parser->idx < parser->size - 1) {
1272                 parser->cont = true;
1273                 parser->buffer[parser->idx++] = ch;
1274         } else {
1275                 ret = -EINVAL;
1276                 goto out;
1277         }
1278
1279         *ppos += read;
1280         ret = read;
1281
1282 out:
1283         return ret;
1284 }
1285
1286 /* TODO add a seq_buf_to_buffer() */
1287 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1288 {
1289         int len;
1290
1291         if (trace_seq_used(s) <= s->seq.readpos)
1292                 return -EBUSY;
1293
1294         len = trace_seq_used(s) - s->seq.readpos;
1295         if (cnt > len)
1296                 cnt = len;
1297         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1298
1299         s->seq.readpos += cnt;
1300         return cnt;
1301 }
1302
1303 unsigned long __read_mostly     tracing_thresh;
1304
1305 #ifdef CONFIG_TRACER_MAX_TRACE
1306 /*
1307  * Copy the new maximum trace into the separate maximum-trace
1308  * structure. (this way the maximum trace is permanently saved,
1309  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1310  */
1311 static void
1312 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1313 {
1314         struct trace_buffer *trace_buf = &tr->trace_buffer;
1315         struct trace_buffer *max_buf = &tr->max_buffer;
1316         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1317         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1318
1319         max_buf->cpu = cpu;
1320         max_buf->time_start = data->preempt_timestamp;
1321
1322         max_data->saved_latency = tr->max_latency;
1323         max_data->critical_start = data->critical_start;
1324         max_data->critical_end = data->critical_end;
1325
1326         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1327         max_data->pid = tsk->pid;
1328         /*
1329          * If tsk == current, then use current_uid(), as that does not use
1330          * RCU. The irq tracer can be called out of RCU scope.
1331          */
1332         if (tsk == current)
1333                 max_data->uid = current_uid();
1334         else
1335                 max_data->uid = task_uid(tsk);
1336
1337         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1338         max_data->policy = tsk->policy;
1339         max_data->rt_priority = tsk->rt_priority;
1340
1341         /* record this tasks comm */
1342         tracing_record_cmdline(tsk);
1343 }
1344
1345 /**
1346  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1347  * @tr: tracer
1348  * @tsk: the task with the latency
1349  * @cpu: The cpu that initiated the trace.
1350  *
1351  * Flip the buffers between the @tr and the max_tr and record information
1352  * about which task was the cause of this latency.
1353  */
1354 void
1355 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1356 {
1357         struct ring_buffer *buf;
1358
1359         if (tr->stop_count)
1360                 return;
1361
1362         WARN_ON_ONCE(!irqs_disabled());
1363
1364         if (!tr->allocated_snapshot) {
1365                 /* Only the nop tracer should hit this when disabling */
1366                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1367                 return;
1368         }
1369
1370         arch_spin_lock(&tr->max_lock);
1371
1372         /* Inherit the recordable setting from trace_buffer */
1373         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1374                 ring_buffer_record_on(tr->max_buffer.buffer);
1375         else
1376                 ring_buffer_record_off(tr->max_buffer.buffer);
1377
1378         buf = tr->trace_buffer.buffer;
1379         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380         tr->max_buffer.buffer = buf;
1381
1382         __update_max_tr(tr, tsk, cpu);
1383         arch_spin_unlock(&tr->max_lock);
1384 }
1385
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397         int ret;
1398
1399         if (tr->stop_count)
1400                 return;
1401
1402         WARN_ON_ONCE(!irqs_disabled());
1403         if (!tr->allocated_snapshot) {
1404                 /* Only the nop tracer should hit this when disabling */
1405                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406                 return;
1407         }
1408
1409         arch_spin_lock(&tr->max_lock);
1410
1411         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412
1413         if (ret == -EBUSY) {
1414                 /*
1415                  * We failed to swap the buffer due to a commit taking
1416                  * place on this CPU. We fail to record, but we reset
1417                  * the max trace buffer (no one writes directly to it)
1418                  * and flag that it failed.
1419                  */
1420                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421                         "Failed to swap buffers due to commit in progress\n");
1422         }
1423
1424         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425
1426         __update_max_tr(tr, tsk, cpu);
1427         arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433         /* Iterators are static, they should be filled or empty */
1434         if (trace_buffer_iter(iter, iter->cpu_file))
1435                 return 0;
1436
1437         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438                                 full);
1439 }
1440
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443
1444 struct trace_selftests {
1445         struct list_head                list;
1446         struct tracer                   *type;
1447 };
1448
1449 static LIST_HEAD(postponed_selftests);
1450
1451 static int save_selftest(struct tracer *type)
1452 {
1453         struct trace_selftests *selftest;
1454
1455         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456         if (!selftest)
1457                 return -ENOMEM;
1458
1459         selftest->type = type;
1460         list_add(&selftest->list, &postponed_selftests);
1461         return 0;
1462 }
1463
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466         struct trace_array *tr = &global_trace;
1467         struct tracer *saved_tracer = tr->current_trace;
1468         int ret;
1469
1470         if (!type->selftest || tracing_selftest_disabled)
1471                 return 0;
1472
1473         /*
1474          * If a tracer registers early in boot up (before scheduling is
1475          * initialized and such), then do not run its selftests yet.
1476          * Instead, run it a little later in the boot process.
1477          */
1478         if (!selftests_can_run)
1479                 return save_selftest(type);
1480
1481         /*
1482          * Run a selftest on this tracer.
1483          * Here we reset the trace buffer, and set the current
1484          * tracer to be this tracer. The tracer can then run some
1485          * internal tracing to verify that everything is in order.
1486          * If we fail, we do not register this tracer.
1487          */
1488         tracing_reset_online_cpus(&tr->trace_buffer);
1489
1490         tr->current_trace = type;
1491
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493         if (type->use_max_tr) {
1494                 /* If we expanded the buffers, make sure the max is expanded too */
1495                 if (ring_buffer_expanded)
1496                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497                                            RING_BUFFER_ALL_CPUS);
1498                 tr->allocated_snapshot = true;
1499         }
1500 #endif
1501
1502         /* the test is responsible for initializing and enabling */
1503         pr_info("Testing tracer %s: ", type->name);
1504         ret = type->selftest(type, tr);
1505         /* the test is responsible for resetting too */
1506         tr->current_trace = saved_tracer;
1507         if (ret) {
1508                 printk(KERN_CONT "FAILED!\n");
1509                 /* Add the warning after printing 'FAILED' */
1510                 WARN_ON(1);
1511                 return -1;
1512         }
1513         /* Only reset on passing, to avoid touching corrupted buffers */
1514         tracing_reset_online_cpus(&tr->trace_buffer);
1515
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517         if (type->use_max_tr) {
1518                 tr->allocated_snapshot = false;
1519
1520                 /* Shrink the max buffer again */
1521                 if (ring_buffer_expanded)
1522                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1523                                            RING_BUFFER_ALL_CPUS);
1524         }
1525 #endif
1526
1527         printk(KERN_CONT "PASSED\n");
1528         return 0;
1529 }
1530
1531 static __init int init_trace_selftests(void)
1532 {
1533         struct trace_selftests *p, *n;
1534         struct tracer *t, **last;
1535         int ret;
1536
1537         selftests_can_run = true;
1538
1539         mutex_lock(&trace_types_lock);
1540
1541         if (list_empty(&postponed_selftests))
1542                 goto out;
1543
1544         pr_info("Running postponed tracer tests:\n");
1545
1546         tracing_selftest_running = true;
1547         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1548                 ret = run_tracer_selftest(p->type);
1549                 /* If the test fails, then warn and remove from available_tracers */
1550                 if (ret < 0) {
1551                         WARN(1, "tracer: %s failed selftest, disabling\n",
1552                              p->type->name);
1553                         last = &trace_types;
1554                         for (t = trace_types; t; t = t->next) {
1555                                 if (t == p->type) {
1556                                         *last = t->next;
1557                                         break;
1558                                 }
1559                                 last = &t->next;
1560                         }
1561                 }
1562                 list_del(&p->list);
1563                 kfree(p);
1564         }
1565         tracing_selftest_running = false;
1566
1567  out:
1568         mutex_unlock(&trace_types_lock);
1569
1570         return 0;
1571 }
1572 core_initcall(init_trace_selftests);
1573 #else
1574 static inline int run_tracer_selftest(struct tracer *type)
1575 {
1576         return 0;
1577 }
1578 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1579
1580 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1581
1582 static void __init apply_trace_boot_options(void);
1583
1584 /**
1585  * register_tracer - register a tracer with the ftrace system.
1586  * @type - the plugin for the tracer
1587  *
1588  * Register a new plugin tracer.
1589  */
1590 int __init register_tracer(struct tracer *type)
1591 {
1592         struct tracer *t;
1593         int ret = 0;
1594
1595         if (!type->name) {
1596                 pr_info("Tracer must have a name\n");
1597                 return -1;
1598         }
1599
1600         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1601                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1602                 return -1;
1603         }
1604
1605         mutex_lock(&trace_types_lock);
1606
1607         tracing_selftest_running = true;
1608
1609         for (t = trace_types; t; t = t->next) {
1610                 if (strcmp(type->name, t->name) == 0) {
1611                         /* already found */
1612                         pr_info("Tracer %s already registered\n",
1613                                 type->name);
1614                         ret = -1;
1615                         goto out;
1616                 }
1617         }
1618
1619         if (!type->set_flag)
1620                 type->set_flag = &dummy_set_flag;
1621         if (!type->flags) {
1622                 /*allocate a dummy tracer_flags*/
1623                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1624                 if (!type->flags) {
1625                         ret = -ENOMEM;
1626                         goto out;
1627                 }
1628                 type->flags->val = 0;
1629                 type->flags->opts = dummy_tracer_opt;
1630         } else
1631                 if (!type->flags->opts)
1632                         type->flags->opts = dummy_tracer_opt;
1633
1634         /* store the tracer for __set_tracer_option */
1635         type->flags->trace = type;
1636
1637         ret = run_tracer_selftest(type);
1638         if (ret < 0)
1639                 goto out;
1640
1641         type->next = trace_types;
1642         trace_types = type;
1643         add_tracer_options(&global_trace, type);
1644
1645  out:
1646         tracing_selftest_running = false;
1647         mutex_unlock(&trace_types_lock);
1648
1649         if (ret || !default_bootup_tracer)
1650                 goto out_unlock;
1651
1652         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1653                 goto out_unlock;
1654
1655         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1656         /* Do we want this tracer to start on bootup? */
1657         tracing_set_tracer(&global_trace, type->name);
1658         default_bootup_tracer = NULL;
1659
1660         apply_trace_boot_options();
1661
1662         /* disable other selftests, since this will break it. */
1663         tracing_selftest_disabled = true;
1664 #ifdef CONFIG_FTRACE_STARTUP_TEST
1665         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1666                type->name);
1667 #endif
1668
1669  out_unlock:
1670         return ret;
1671 }
1672
1673 void tracing_reset(struct trace_buffer *buf, int cpu)
1674 {
1675         struct ring_buffer *buffer = buf->buffer;
1676
1677         if (!buffer)
1678                 return;
1679
1680         ring_buffer_record_disable(buffer);
1681
1682         /* Make sure all commits have finished */
1683         synchronize_sched();
1684         ring_buffer_reset_cpu(buffer, cpu);
1685
1686         ring_buffer_record_enable(buffer);
1687 }
1688
1689 void tracing_reset_online_cpus(struct trace_buffer *buf)
1690 {
1691         struct ring_buffer *buffer = buf->buffer;
1692         int cpu;
1693
1694         if (!buffer)
1695                 return;
1696
1697         ring_buffer_record_disable(buffer);
1698
1699         /* Make sure all commits have finished */
1700         synchronize_sched();
1701
1702         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1703
1704         for_each_online_cpu(cpu)
1705                 ring_buffer_reset_cpu(buffer, cpu);
1706
1707         ring_buffer_record_enable(buffer);
1708 }
1709
1710 /* Must have trace_types_lock held */
1711 void tracing_reset_all_online_cpus(void)
1712 {
1713         struct trace_array *tr;
1714
1715         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1716                 if (!tr->clear_trace)
1717                         continue;
1718                 tr->clear_trace = false;
1719                 tracing_reset_online_cpus(&tr->trace_buffer);
1720 #ifdef CONFIG_TRACER_MAX_TRACE
1721                 tracing_reset_online_cpus(&tr->max_buffer);
1722 #endif
1723         }
1724 }
1725
1726 static int *tgid_map;
1727
1728 #define SAVED_CMDLINES_DEFAULT 128
1729 #define NO_CMDLINE_MAP UINT_MAX
1730 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1731 struct saved_cmdlines_buffer {
1732         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1733         unsigned *map_cmdline_to_pid;
1734         unsigned cmdline_num;
1735         int cmdline_idx;
1736         char *saved_cmdlines;
1737 };
1738 static struct saved_cmdlines_buffer *savedcmd;
1739
1740 static inline char *get_saved_cmdlines(int idx)
1741 {
1742         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1743 }
1744
1745 static inline void set_cmdline(int idx, const char *cmdline)
1746 {
1747         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1748 }
1749
1750 static int allocate_cmdlines_buffer(unsigned int val,
1751                                     struct saved_cmdlines_buffer *s)
1752 {
1753         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1754                                         GFP_KERNEL);
1755         if (!s->map_cmdline_to_pid)
1756                 return -ENOMEM;
1757
1758         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1759         if (!s->saved_cmdlines) {
1760                 kfree(s->map_cmdline_to_pid);
1761                 return -ENOMEM;
1762         }
1763
1764         s->cmdline_idx = 0;
1765         s->cmdline_num = val;
1766         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1767                sizeof(s->map_pid_to_cmdline));
1768         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1769                val * sizeof(*s->map_cmdline_to_pid));
1770
1771         return 0;
1772 }
1773
1774 static int trace_create_savedcmd(void)
1775 {
1776         int ret;
1777
1778         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1779         if (!savedcmd)
1780                 return -ENOMEM;
1781
1782         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1783         if (ret < 0) {
1784                 kfree(savedcmd);
1785                 savedcmd = NULL;
1786                 return -ENOMEM;
1787         }
1788
1789         return 0;
1790 }
1791
1792 int is_tracing_stopped(void)
1793 {
1794         return global_trace.stop_count;
1795 }
1796
1797 /**
1798  * tracing_start - quick start of the tracer
1799  *
1800  * If tracing is enabled but was stopped by tracing_stop,
1801  * this will start the tracer back up.
1802  */
1803 void tracing_start(void)
1804 {
1805         struct ring_buffer *buffer;
1806         unsigned long flags;
1807
1808         if (tracing_disabled)
1809                 return;
1810
1811         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1812         if (--global_trace.stop_count) {
1813                 if (global_trace.stop_count < 0) {
1814                         /* Someone screwed up their debugging */
1815                         WARN_ON_ONCE(1);
1816                         global_trace.stop_count = 0;
1817                 }
1818                 goto out;
1819         }
1820
1821         /* Prevent the buffers from switching */
1822         arch_spin_lock(&global_trace.max_lock);
1823
1824         buffer = global_trace.trace_buffer.buffer;
1825         if (buffer)
1826                 ring_buffer_record_enable(buffer);
1827
1828 #ifdef CONFIG_TRACER_MAX_TRACE
1829         buffer = global_trace.max_buffer.buffer;
1830         if (buffer)
1831                 ring_buffer_record_enable(buffer);
1832 #endif
1833
1834         arch_spin_unlock(&global_trace.max_lock);
1835
1836  out:
1837         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1838 }
1839
1840 static void tracing_start_tr(struct trace_array *tr)
1841 {
1842         struct ring_buffer *buffer;
1843         unsigned long flags;
1844
1845         if (tracing_disabled)
1846                 return;
1847
1848         /* If global, we need to also start the max tracer */
1849         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1850                 return tracing_start();
1851
1852         raw_spin_lock_irqsave(&tr->start_lock, flags);
1853
1854         if (--tr->stop_count) {
1855                 if (tr->stop_count < 0) {
1856                         /* Someone screwed up their debugging */
1857                         WARN_ON_ONCE(1);
1858                         tr->stop_count = 0;
1859                 }
1860                 goto out;
1861         }
1862
1863         buffer = tr->trace_buffer.buffer;
1864         if (buffer)
1865                 ring_buffer_record_enable(buffer);
1866
1867  out:
1868         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1869 }
1870
1871 /**
1872  * tracing_stop - quick stop of the tracer
1873  *
1874  * Light weight way to stop tracing. Use in conjunction with
1875  * tracing_start.
1876  */
1877 void tracing_stop(void)
1878 {
1879         struct ring_buffer *buffer;
1880         unsigned long flags;
1881
1882         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1883         if (global_trace.stop_count++)
1884                 goto out;
1885
1886         /* Prevent the buffers from switching */
1887         arch_spin_lock(&global_trace.max_lock);
1888
1889         buffer = global_trace.trace_buffer.buffer;
1890         if (buffer)
1891                 ring_buffer_record_disable(buffer);
1892
1893 #ifdef CONFIG_TRACER_MAX_TRACE
1894         buffer = global_trace.max_buffer.buffer;
1895         if (buffer)
1896                 ring_buffer_record_disable(buffer);
1897 #endif
1898
1899         arch_spin_unlock(&global_trace.max_lock);
1900
1901  out:
1902         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1903 }
1904
1905 static void tracing_stop_tr(struct trace_array *tr)
1906 {
1907         struct ring_buffer *buffer;
1908         unsigned long flags;
1909
1910         /* If global, we need to also stop the max tracer */
1911         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1912                 return tracing_stop();
1913
1914         raw_spin_lock_irqsave(&tr->start_lock, flags);
1915         if (tr->stop_count++)
1916                 goto out;
1917
1918         buffer = tr->trace_buffer.buffer;
1919         if (buffer)
1920                 ring_buffer_record_disable(buffer);
1921
1922  out:
1923         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1924 }
1925
1926 static int trace_save_cmdline(struct task_struct *tsk)
1927 {
1928         unsigned tpid, idx;
1929
1930         /* treat recording of idle task as a success */
1931         if (!tsk->pid)
1932                 return 1;
1933
1934         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1935
1936         /*
1937          * It's not the end of the world if we don't get
1938          * the lock, but we also don't want to spin
1939          * nor do we want to disable interrupts,
1940          * so if we miss here, then better luck next time.
1941          */
1942         if (!arch_spin_trylock(&trace_cmdline_lock))
1943                 return 0;
1944
1945         idx = savedcmd->map_pid_to_cmdline[tpid];
1946         if (idx == NO_CMDLINE_MAP) {
1947                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1948
1949                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1950                 savedcmd->cmdline_idx = idx;
1951         }
1952
1953         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1954         set_cmdline(idx, tsk->comm);
1955
1956         arch_spin_unlock(&trace_cmdline_lock);
1957
1958         return 1;
1959 }
1960
1961 static void __trace_find_cmdline(int pid, char comm[])
1962 {
1963         unsigned map;
1964         int tpid;
1965
1966         if (!pid) {
1967                 strcpy(comm, "<idle>");
1968                 return;
1969         }
1970
1971         if (WARN_ON_ONCE(pid < 0)) {
1972                 strcpy(comm, "<XXX>");
1973                 return;
1974         }
1975
1976         tpid = pid & (PID_MAX_DEFAULT - 1);
1977         map = savedcmd->map_pid_to_cmdline[tpid];
1978         if (map != NO_CMDLINE_MAP) {
1979                 tpid = savedcmd->map_cmdline_to_pid[map];
1980                 if (tpid == pid) {
1981                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1982                         return;
1983                 }
1984         }
1985         strcpy(comm, "<...>");
1986 }
1987
1988 void trace_find_cmdline(int pid, char comm[])
1989 {
1990         preempt_disable();
1991         arch_spin_lock(&trace_cmdline_lock);
1992
1993         __trace_find_cmdline(pid, comm);
1994
1995         arch_spin_unlock(&trace_cmdline_lock);
1996         preempt_enable();
1997 }
1998
1999 int trace_find_tgid(int pid)
2000 {
2001         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2002                 return 0;
2003
2004         return tgid_map[pid];
2005 }
2006
2007 static int trace_save_tgid(struct task_struct *tsk)
2008 {
2009         /* treat recording of idle task as a success */
2010         if (!tsk->pid)
2011                 return 1;
2012
2013         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2014                 return 0;
2015
2016         tgid_map[tsk->pid] = tsk->tgid;
2017         return 1;
2018 }
2019
2020 static bool tracing_record_taskinfo_skip(int flags)
2021 {
2022         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2023                 return true;
2024         if (!__this_cpu_read(trace_taskinfo_save))
2025                 return true;
2026         return false;
2027 }
2028
2029 /**
2030  * tracing_record_taskinfo - record the task info of a task
2031  *
2032  * @task  - task to record
2033  * @flags - TRACE_RECORD_CMDLINE for recording comm
2034  *        - TRACE_RECORD_TGID for recording tgid
2035  */
2036 void tracing_record_taskinfo(struct task_struct *task, int flags)
2037 {
2038         bool done;
2039
2040         if (tracing_record_taskinfo_skip(flags))
2041                 return;
2042
2043         /*
2044          * Record as much task information as possible. If some fail, continue
2045          * to try to record the others.
2046          */
2047         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2048         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2049
2050         /* If recording any information failed, retry again soon. */
2051         if (!done)
2052                 return;
2053
2054         __this_cpu_write(trace_taskinfo_save, false);
2055 }
2056
2057 /**
2058  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2059  *
2060  * @prev - previous task during sched_switch
2061  * @next - next task during sched_switch
2062  * @flags - TRACE_RECORD_CMDLINE for recording comm
2063  *          TRACE_RECORD_TGID for recording tgid
2064  */
2065 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2066                                           struct task_struct *next, int flags)
2067 {
2068         bool done;
2069
2070         if (tracing_record_taskinfo_skip(flags))
2071                 return;
2072
2073         /*
2074          * Record as much task information as possible. If some fail, continue
2075          * to try to record the others.
2076          */
2077         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2078         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2079         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2080         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2081
2082         /* If recording any information failed, retry again soon. */
2083         if (!done)
2084                 return;
2085
2086         __this_cpu_write(trace_taskinfo_save, false);
2087 }
2088
2089 /* Helpers to record a specific task information */
2090 void tracing_record_cmdline(struct task_struct *task)
2091 {
2092         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2093 }
2094
2095 void tracing_record_tgid(struct task_struct *task)
2096 {
2097         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2098 }
2099
2100 /*
2101  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2102  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2103  * simplifies those functions and keeps them in sync.
2104  */
2105 enum print_line_t trace_handle_return(struct trace_seq *s)
2106 {
2107         return trace_seq_has_overflowed(s) ?
2108                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2109 }
2110 EXPORT_SYMBOL_GPL(trace_handle_return);
2111
2112 void
2113 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2114                              int pc)
2115 {
2116         struct task_struct *tsk = current;
2117
2118         entry->preempt_count            = pc & 0xff;
2119         entry->pid                      = (tsk) ? tsk->pid : 0;
2120         entry->flags =
2121 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2122                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2123 #else
2124                 TRACE_FLAG_IRQS_NOSUPPORT |
2125 #endif
2126                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2127                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2128                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2129                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2130                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2131 }
2132 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2133
2134 struct ring_buffer_event *
2135 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2136                           int type,
2137                           unsigned long len,
2138                           unsigned long flags, int pc)
2139 {
2140         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2141 }
2142
2143 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2144 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2145 static int trace_buffered_event_ref;
2146
2147 /**
2148  * trace_buffered_event_enable - enable buffering events
2149  *
2150  * When events are being filtered, it is quicker to use a temporary
2151  * buffer to write the event data into if there's a likely chance
2152  * that it will not be committed. The discard of the ring buffer
2153  * is not as fast as committing, and is much slower than copying
2154  * a commit.
2155  *
2156  * When an event is to be filtered, allocate per cpu buffers to
2157  * write the event data into, and if the event is filtered and discarded
2158  * it is simply dropped, otherwise, the entire data is to be committed
2159  * in one shot.
2160  */
2161 void trace_buffered_event_enable(void)
2162 {
2163         struct ring_buffer_event *event;
2164         struct page *page;
2165         int cpu;
2166
2167         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2168
2169         if (trace_buffered_event_ref++)
2170                 return;
2171
2172         for_each_tracing_cpu(cpu) {
2173                 page = alloc_pages_node(cpu_to_node(cpu),
2174                                         GFP_KERNEL | __GFP_NORETRY, 0);
2175                 if (!page)
2176                         goto failed;
2177
2178                 event = page_address(page);
2179                 memset(event, 0, sizeof(*event));
2180
2181                 per_cpu(trace_buffered_event, cpu) = event;
2182
2183                 preempt_disable();
2184                 if (cpu == smp_processor_id() &&
2185                     this_cpu_read(trace_buffered_event) !=
2186                     per_cpu(trace_buffered_event, cpu))
2187                         WARN_ON_ONCE(1);
2188                 preempt_enable();
2189         }
2190
2191         return;
2192  failed:
2193         trace_buffered_event_disable();
2194 }
2195
2196 static void enable_trace_buffered_event(void *data)
2197 {
2198         /* Probably not needed, but do it anyway */
2199         smp_rmb();
2200         this_cpu_dec(trace_buffered_event_cnt);
2201 }
2202
2203 static void disable_trace_buffered_event(void *data)
2204 {
2205         this_cpu_inc(trace_buffered_event_cnt);
2206 }
2207
2208 /**
2209  * trace_buffered_event_disable - disable buffering events
2210  *
2211  * When a filter is removed, it is faster to not use the buffered
2212  * events, and to commit directly into the ring buffer. Free up
2213  * the temp buffers when there are no more users. This requires
2214  * special synchronization with current events.
2215  */
2216 void trace_buffered_event_disable(void)
2217 {
2218         int cpu;
2219
2220         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2221
2222         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2223                 return;
2224
2225         if (--trace_buffered_event_ref)
2226                 return;
2227
2228         preempt_disable();
2229         /* For each CPU, set the buffer as used. */
2230         smp_call_function_many(tracing_buffer_mask,
2231                                disable_trace_buffered_event, NULL, 1);
2232         preempt_enable();
2233
2234         /* Wait for all current users to finish */
2235         synchronize_sched();
2236
2237         for_each_tracing_cpu(cpu) {
2238                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2239                 per_cpu(trace_buffered_event, cpu) = NULL;
2240         }
2241         /*
2242          * Make sure trace_buffered_event is NULL before clearing
2243          * trace_buffered_event_cnt.
2244          */
2245         smp_wmb();
2246
2247         preempt_disable();
2248         /* Do the work on each cpu */
2249         smp_call_function_many(tracing_buffer_mask,
2250                                enable_trace_buffered_event, NULL, 1);
2251         preempt_enable();
2252 }
2253
2254 static struct ring_buffer *temp_buffer;
2255
2256 struct ring_buffer_event *
2257 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2258                           struct trace_event_file *trace_file,
2259                           int type, unsigned long len,
2260                           unsigned long flags, int pc)
2261 {
2262         struct ring_buffer_event *entry;
2263         int val;
2264
2265         *current_rb = trace_file->tr->trace_buffer.buffer;
2266
2267         if ((trace_file->flags &
2268              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2269             (entry = this_cpu_read(trace_buffered_event))) {
2270                 /* Try to use the per cpu buffer first */
2271                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2272                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2273                         trace_event_setup(entry, type, flags, pc);
2274                         entry->array[0] = len;
2275                         return entry;
2276                 }
2277                 this_cpu_dec(trace_buffered_event_cnt);
2278         }
2279
2280         entry = __trace_buffer_lock_reserve(*current_rb,
2281                                             type, len, flags, pc);
2282         /*
2283          * If tracing is off, but we have triggers enabled
2284          * we still need to look at the event data. Use the temp_buffer
2285          * to store the trace event for the tigger to use. It's recusive
2286          * safe and will not be recorded anywhere.
2287          */
2288         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2289                 *current_rb = temp_buffer;
2290                 entry = __trace_buffer_lock_reserve(*current_rb,
2291                                                     type, len, flags, pc);
2292         }
2293         return entry;
2294 }
2295 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2296
2297 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2298 static DEFINE_MUTEX(tracepoint_printk_mutex);
2299
2300 static void output_printk(struct trace_event_buffer *fbuffer)
2301 {
2302         struct trace_event_call *event_call;
2303         struct trace_event *event;
2304         unsigned long flags;
2305         struct trace_iterator *iter = tracepoint_print_iter;
2306
2307         /* We should never get here if iter is NULL */
2308         if (WARN_ON_ONCE(!iter))
2309                 return;
2310
2311         event_call = fbuffer->trace_file->event_call;
2312         if (!event_call || !event_call->event.funcs ||
2313             !event_call->event.funcs->trace)
2314                 return;
2315
2316         event = &fbuffer->trace_file->event_call->event;
2317
2318         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2319         trace_seq_init(&iter->seq);
2320         iter->ent = fbuffer->entry;
2321         event_call->event.funcs->trace(iter, 0, event);
2322         trace_seq_putc(&iter->seq, 0);
2323         printk("%s", iter->seq.buffer);
2324
2325         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2326 }
2327
2328 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2329                              void __user *buffer, size_t *lenp,
2330                              loff_t *ppos)
2331 {
2332         int save_tracepoint_printk;
2333         int ret;
2334
2335         mutex_lock(&tracepoint_printk_mutex);
2336         save_tracepoint_printk = tracepoint_printk;
2337
2338         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2339
2340         /*
2341          * This will force exiting early, as tracepoint_printk
2342          * is always zero when tracepoint_printk_iter is not allocated
2343          */
2344         if (!tracepoint_print_iter)
2345                 tracepoint_printk = 0;
2346
2347         if (save_tracepoint_printk == tracepoint_printk)
2348                 goto out;
2349
2350         if (tracepoint_printk)
2351                 static_key_enable(&tracepoint_printk_key.key);
2352         else
2353                 static_key_disable(&tracepoint_printk_key.key);
2354
2355  out:
2356         mutex_unlock(&tracepoint_printk_mutex);
2357
2358         return ret;
2359 }
2360
2361 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2362 {
2363         if (static_key_false(&tracepoint_printk_key.key))
2364                 output_printk(fbuffer);
2365
2366         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2367                                     fbuffer->event, fbuffer->entry,
2368                                     fbuffer->flags, fbuffer->pc);
2369 }
2370 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2371
2372 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2373                                      struct ring_buffer *buffer,
2374                                      struct ring_buffer_event *event,
2375                                      unsigned long flags, int pc,
2376                                      struct pt_regs *regs)
2377 {
2378         __buffer_unlock_commit(buffer, event);
2379
2380         /*
2381          * If regs is not set, then skip the following callers:
2382          *   trace_buffer_unlock_commit_regs
2383          *   event_trigger_unlock_commit
2384          *   trace_event_buffer_commit
2385          *   trace_event_raw_event_sched_switch
2386          * Note, we can still get here via blktrace, wakeup tracer
2387          * and mmiotrace, but that's ok if they lose a function or
2388          * two. They are that meaningful.
2389          */
2390         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2391         ftrace_trace_userstack(tr, buffer, flags, pc);
2392 }
2393
2394 /*
2395  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2396  */
2397 void
2398 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2399                                    struct ring_buffer_event *event)
2400 {
2401         __buffer_unlock_commit(buffer, event);
2402 }
2403
2404 static void
2405 trace_process_export(struct trace_export *export,
2406                struct ring_buffer_event *event)
2407 {
2408         struct trace_entry *entry;
2409         unsigned int size = 0;
2410
2411         entry = ring_buffer_event_data(event);
2412         size = ring_buffer_event_length(event);
2413         export->write(entry, size);
2414 }
2415
2416 static DEFINE_MUTEX(ftrace_export_lock);
2417
2418 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2419
2420 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2421
2422 static inline void ftrace_exports_enable(void)
2423 {
2424         static_branch_enable(&ftrace_exports_enabled);
2425 }
2426
2427 static inline void ftrace_exports_disable(void)
2428 {
2429         static_branch_disable(&ftrace_exports_enabled);
2430 }
2431
2432 void ftrace_exports(struct ring_buffer_event *event)
2433 {
2434         struct trace_export *export;
2435
2436         preempt_disable_notrace();
2437
2438         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2439         while (export) {
2440                 trace_process_export(export, event);
2441                 export = rcu_dereference_raw_notrace(export->next);
2442         }
2443
2444         preempt_enable_notrace();
2445 }
2446
2447 static inline void
2448 add_trace_export(struct trace_export **list, struct trace_export *export)
2449 {
2450         rcu_assign_pointer(export->next, *list);
2451         /*
2452          * We are entering export into the list but another
2453          * CPU might be walking that list. We need to make sure
2454          * the export->next pointer is valid before another CPU sees
2455          * the export pointer included into the list.
2456          */
2457         rcu_assign_pointer(*list, export);
2458 }
2459
2460 static inline int
2461 rm_trace_export(struct trace_export **list, struct trace_export *export)
2462 {
2463         struct trace_export **p;
2464
2465         for (p = list; *p != NULL; p = &(*p)->next)
2466                 if (*p == export)
2467                         break;
2468
2469         if (*p != export)
2470                 return -1;
2471
2472         rcu_assign_pointer(*p, (*p)->next);
2473
2474         return 0;
2475 }
2476
2477 static inline void
2478 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2479 {
2480         if (*list == NULL)
2481                 ftrace_exports_enable();
2482
2483         add_trace_export(list, export);
2484 }
2485
2486 static inline int
2487 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2488 {
2489         int ret;
2490
2491         ret = rm_trace_export(list, export);
2492         if (*list == NULL)
2493                 ftrace_exports_disable();
2494
2495         return ret;
2496 }
2497
2498 int register_ftrace_export(struct trace_export *export)
2499 {
2500         if (WARN_ON_ONCE(!export->write))
2501                 return -1;
2502
2503         mutex_lock(&ftrace_export_lock);
2504
2505         add_ftrace_export(&ftrace_exports_list, export);
2506
2507         mutex_unlock(&ftrace_export_lock);
2508
2509         return 0;
2510 }
2511 EXPORT_SYMBOL_GPL(register_ftrace_export);
2512
2513 int unregister_ftrace_export(struct trace_export *export)
2514 {
2515         int ret;
2516
2517         mutex_lock(&ftrace_export_lock);
2518
2519         ret = rm_ftrace_export(&ftrace_exports_list, export);
2520
2521         mutex_unlock(&ftrace_export_lock);
2522
2523         return ret;
2524 }
2525 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2526
2527 void
2528 trace_function(struct trace_array *tr,
2529                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2530                int pc)
2531 {
2532         struct trace_event_call *call = &event_function;
2533         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2534         struct ring_buffer_event *event;
2535         struct ftrace_entry *entry;
2536
2537         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2538                                             flags, pc);
2539         if (!event)
2540                 return;
2541         entry   = ring_buffer_event_data(event);
2542         entry->ip                       = ip;
2543         entry->parent_ip                = parent_ip;
2544
2545         if (!call_filter_check_discard(call, entry, buffer, event)) {
2546                 if (static_branch_unlikely(&ftrace_exports_enabled))
2547                         ftrace_exports(event);
2548                 __buffer_unlock_commit(buffer, event);
2549         }
2550 }
2551
2552 #ifdef CONFIG_STACKTRACE
2553
2554 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2555 struct ftrace_stack {
2556         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2557 };
2558
2559 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2560 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2561
2562 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2563                                  unsigned long flags,
2564                                  int skip, int pc, struct pt_regs *regs)
2565 {
2566         struct trace_event_call *call = &event_kernel_stack;
2567         struct ring_buffer_event *event;
2568         struct stack_entry *entry;
2569         struct stack_trace trace;
2570         int use_stack;
2571         int size = FTRACE_STACK_ENTRIES;
2572
2573         trace.nr_entries        = 0;
2574         trace.skip              = skip;
2575
2576         /*
2577          * Add two, for this function and the call to save_stack_trace()
2578          * If regs is set, then these functions will not be in the way.
2579          */
2580         if (!regs)
2581                 trace.skip += 2;
2582
2583         /*
2584          * Since events can happen in NMIs there's no safe way to
2585          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2586          * or NMI comes in, it will just have to use the default
2587          * FTRACE_STACK_SIZE.
2588          */
2589         preempt_disable_notrace();
2590
2591         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2592         /*
2593          * We don't need any atomic variables, just a barrier.
2594          * If an interrupt comes in, we don't care, because it would
2595          * have exited and put the counter back to what we want.
2596          * We just need a barrier to keep gcc from moving things
2597          * around.
2598          */
2599         barrier();
2600         if (use_stack == 1) {
2601                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2602                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2603
2604                 if (regs)
2605                         save_stack_trace_regs(regs, &trace);
2606                 else
2607                         save_stack_trace(&trace);
2608
2609                 if (trace.nr_entries > size)
2610                         size = trace.nr_entries;
2611         } else
2612                 /* From now on, use_stack is a boolean */
2613                 use_stack = 0;
2614
2615         size *= sizeof(unsigned long);
2616
2617         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2618                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2619                                     flags, pc);
2620         if (!event)
2621                 goto out;
2622         entry = ring_buffer_event_data(event);
2623
2624         memset(&entry->caller, 0, size);
2625
2626         if (use_stack)
2627                 memcpy(&entry->caller, trace.entries,
2628                        trace.nr_entries * sizeof(unsigned long));
2629         else {
2630                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2631                 trace.entries           = entry->caller;
2632                 if (regs)
2633                         save_stack_trace_regs(regs, &trace);
2634                 else
2635                         save_stack_trace(&trace);
2636         }
2637
2638         entry->size = trace.nr_entries;
2639
2640         if (!call_filter_check_discard(call, entry, buffer, event))
2641                 __buffer_unlock_commit(buffer, event);
2642
2643  out:
2644         /* Again, don't let gcc optimize things here */
2645         barrier();
2646         __this_cpu_dec(ftrace_stack_reserve);
2647         preempt_enable_notrace();
2648
2649 }
2650
2651 static inline void ftrace_trace_stack(struct trace_array *tr,
2652                                       struct ring_buffer *buffer,
2653                                       unsigned long flags,
2654                                       int skip, int pc, struct pt_regs *regs)
2655 {
2656         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2657                 return;
2658
2659         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2660 }
2661
2662 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2663                    int pc)
2664 {
2665         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2666
2667         if (rcu_is_watching()) {
2668                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2669                 return;
2670         }
2671
2672         /*
2673          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2674          * but if the above rcu_is_watching() failed, then the NMI
2675          * triggered someplace critical, and rcu_irq_enter() should
2676          * not be called from NMI.
2677          */
2678         if (unlikely(in_nmi()))
2679                 return;
2680
2681         /*
2682          * It is possible that a function is being traced in a
2683          * location that RCU is not watching. A call to
2684          * rcu_irq_enter() will make sure that it is, but there's
2685          * a few internal rcu functions that could be traced
2686          * where that wont work either. In those cases, we just
2687          * do nothing.
2688          */
2689         if (unlikely(rcu_irq_enter_disabled()))
2690                 return;
2691
2692         rcu_irq_enter_irqson();
2693         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2694         rcu_irq_exit_irqson();
2695 }
2696
2697 /**
2698  * trace_dump_stack - record a stack back trace in the trace buffer
2699  * @skip: Number of functions to skip (helper handlers)
2700  */
2701 void trace_dump_stack(int skip)
2702 {
2703         unsigned long flags;
2704
2705         if (tracing_disabled || tracing_selftest_running)
2706                 return;
2707
2708         local_save_flags(flags);
2709
2710         /*
2711          * Skip 3 more, seems to get us at the caller of
2712          * this function.
2713          */
2714         skip += 3;
2715         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2716                              flags, skip, preempt_count(), NULL);
2717 }
2718
2719 static DEFINE_PER_CPU(int, user_stack_count);
2720
2721 void
2722 ftrace_trace_userstack(struct trace_array *tr,
2723                        struct ring_buffer *buffer, unsigned long flags, int pc)
2724 {
2725         struct trace_event_call *call = &event_user_stack;
2726         struct ring_buffer_event *event;
2727         struct userstack_entry *entry;
2728         struct stack_trace trace;
2729
2730         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2731                 return;
2732
2733         /*
2734          * NMIs can not handle page faults, even with fix ups.
2735          * The save user stack can (and often does) fault.
2736          */
2737         if (unlikely(in_nmi()))
2738                 return;
2739
2740         /*
2741          * prevent recursion, since the user stack tracing may
2742          * trigger other kernel events.
2743          */
2744         preempt_disable();
2745         if (__this_cpu_read(user_stack_count))
2746                 goto out;
2747
2748         __this_cpu_inc(user_stack_count);
2749
2750         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751                                             sizeof(*entry), flags, pc);
2752         if (!event)
2753                 goto out_drop_count;
2754         entry   = ring_buffer_event_data(event);
2755
2756         entry->tgid             = current->tgid;
2757         memset(&entry->caller, 0, sizeof(entry->caller));
2758
2759         trace.nr_entries        = 0;
2760         trace.max_entries       = FTRACE_STACK_ENTRIES;
2761         trace.skip              = 0;
2762         trace.entries           = entry->caller;
2763
2764         save_stack_trace_user(&trace);
2765         if (!call_filter_check_discard(call, entry, buffer, event))
2766                 __buffer_unlock_commit(buffer, event);
2767
2768  out_drop_count:
2769         __this_cpu_dec(user_stack_count);
2770  out:
2771         preempt_enable();
2772 }
2773
2774 #ifdef UNUSED
2775 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2776 {
2777         ftrace_trace_userstack(tr, flags, preempt_count());
2778 }
2779 #endif /* UNUSED */
2780
2781 #endif /* CONFIG_STACKTRACE */
2782
2783 /* created for use with alloc_percpu */
2784 struct trace_buffer_struct {
2785         int nesting;
2786         char buffer[4][TRACE_BUF_SIZE];
2787 };
2788
2789 static struct trace_buffer_struct *trace_percpu_buffer;
2790
2791 /*
2792  * Thise allows for lockless recording.  If we're nested too deeply, then
2793  * this returns NULL.
2794  */
2795 static char *get_trace_buf(void)
2796 {
2797         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2798
2799         if (!buffer || buffer->nesting >= 4)
2800                 return NULL;
2801
2802         buffer->nesting++;
2803
2804         /* Interrupts must see nesting incremented before we use the buffer */
2805         barrier();
2806         return &buffer->buffer[buffer->nesting - 1][0];
2807 }
2808
2809 static void put_trace_buf(void)
2810 {
2811         /* Don't let the decrement of nesting leak before this */
2812         barrier();
2813         this_cpu_dec(trace_percpu_buffer->nesting);
2814 }
2815
2816 static int alloc_percpu_trace_buffer(void)
2817 {
2818         struct trace_buffer_struct *buffers;
2819
2820         buffers = alloc_percpu(struct trace_buffer_struct);
2821         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2822                 return -ENOMEM;
2823
2824         trace_percpu_buffer = buffers;
2825         return 0;
2826 }
2827
2828 static int buffers_allocated;
2829
2830 void trace_printk_init_buffers(void)
2831 {
2832         if (buffers_allocated)
2833                 return;
2834
2835         if (alloc_percpu_trace_buffer())
2836                 return;
2837
2838         /* trace_printk() is for debug use only. Don't use it in production. */
2839
2840         pr_warn("\n");
2841         pr_warn("**********************************************************\n");
2842         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2843         pr_warn("**                                                      **\n");
2844         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2845         pr_warn("**                                                      **\n");
2846         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2847         pr_warn("** unsafe for production use.                           **\n");
2848         pr_warn("**                                                      **\n");
2849         pr_warn("** If you see this message and you are not debugging    **\n");
2850         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2851         pr_warn("**                                                      **\n");
2852         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2853         pr_warn("**********************************************************\n");
2854
2855         /* Expand the buffers to set size */
2856         tracing_update_buffers();
2857
2858         buffers_allocated = 1;
2859
2860         /*
2861          * trace_printk_init_buffers() can be called by modules.
2862          * If that happens, then we need to start cmdline recording
2863          * directly here. If the global_trace.buffer is already
2864          * allocated here, then this was called by module code.
2865          */
2866         if (global_trace.trace_buffer.buffer)
2867                 tracing_start_cmdline_record();
2868 }
2869
2870 void trace_printk_start_comm(void)
2871 {
2872         /* Start tracing comms if trace printk is set */
2873         if (!buffers_allocated)
2874                 return;
2875         tracing_start_cmdline_record();
2876 }
2877
2878 static void trace_printk_start_stop_comm(int enabled)
2879 {
2880         if (!buffers_allocated)
2881                 return;
2882
2883         if (enabled)
2884                 tracing_start_cmdline_record();
2885         else
2886                 tracing_stop_cmdline_record();
2887 }
2888
2889 /**
2890  * trace_vbprintk - write binary msg to tracing buffer
2891  *
2892  */
2893 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2894 {
2895         struct trace_event_call *call = &event_bprint;
2896         struct ring_buffer_event *event;
2897         struct ring_buffer *buffer;
2898         struct trace_array *tr = &global_trace;
2899         struct bprint_entry *entry;
2900         unsigned long flags;
2901         char *tbuffer;
2902         int len = 0, size, pc;
2903
2904         if (unlikely(tracing_selftest_running || tracing_disabled))
2905                 return 0;
2906
2907         /* Don't pollute graph traces with trace_vprintk internals */
2908         pause_graph_tracing();
2909
2910         pc = preempt_count();
2911         preempt_disable_notrace();
2912
2913         tbuffer = get_trace_buf();
2914         if (!tbuffer) {
2915                 len = 0;
2916                 goto out_nobuffer;
2917         }
2918
2919         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2920
2921         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2922                 goto out;
2923
2924         local_save_flags(flags);
2925         size = sizeof(*entry) + sizeof(u32) * len;
2926         buffer = tr->trace_buffer.buffer;
2927         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2928                                             flags, pc);
2929         if (!event)
2930                 goto out;
2931         entry = ring_buffer_event_data(event);
2932         entry->ip                       = ip;
2933         entry->fmt                      = fmt;
2934
2935         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2936         if (!call_filter_check_discard(call, entry, buffer, event)) {
2937                 __buffer_unlock_commit(buffer, event);
2938                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2939         }
2940
2941 out:
2942         put_trace_buf();
2943
2944 out_nobuffer:
2945         preempt_enable_notrace();
2946         unpause_graph_tracing();
2947
2948         return len;
2949 }
2950 EXPORT_SYMBOL_GPL(trace_vbprintk);
2951
2952 __printf(3, 0)
2953 static int
2954 __trace_array_vprintk(struct ring_buffer *buffer,
2955                       unsigned long ip, const char *fmt, va_list args)
2956 {
2957         struct trace_event_call *call = &event_print;
2958         struct ring_buffer_event *event;
2959         int len = 0, size, pc;
2960         struct print_entry *entry;
2961         unsigned long flags;
2962         char *tbuffer;
2963
2964         if (tracing_disabled || tracing_selftest_running)
2965                 return 0;
2966
2967         /* Don't pollute graph traces with trace_vprintk internals */
2968         pause_graph_tracing();
2969
2970         pc = preempt_count();
2971         preempt_disable_notrace();
2972
2973
2974         tbuffer = get_trace_buf();
2975         if (!tbuffer) {
2976                 len = 0;
2977                 goto out_nobuffer;
2978         }
2979
2980         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2981
2982         local_save_flags(flags);
2983         size = sizeof(*entry) + len + 1;
2984         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2985                                             flags, pc);
2986         if (!event)
2987                 goto out;
2988         entry = ring_buffer_event_data(event);
2989         entry->ip = ip;
2990
2991         memcpy(&entry->buf, tbuffer, len + 1);
2992         if (!call_filter_check_discard(call, entry, buffer, event)) {
2993                 __buffer_unlock_commit(buffer, event);
2994                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2995         }
2996
2997 out:
2998         put_trace_buf();
2999
3000 out_nobuffer:
3001         preempt_enable_notrace();
3002         unpause_graph_tracing();
3003
3004         return len;
3005 }
3006
3007 __printf(3, 0)
3008 int trace_array_vprintk(struct trace_array *tr,
3009                         unsigned long ip, const char *fmt, va_list args)
3010 {
3011         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3012 }
3013
3014 __printf(3, 0)
3015 int trace_array_printk(struct trace_array *tr,
3016                        unsigned long ip, const char *fmt, ...)
3017 {
3018         int ret;
3019         va_list ap;
3020
3021         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3022                 return 0;
3023
3024         if (!tr)
3025                 return -ENOENT;
3026
3027         va_start(ap, fmt);
3028         ret = trace_array_vprintk(tr, ip, fmt, ap);
3029         va_end(ap);
3030         return ret;
3031 }
3032
3033 __printf(3, 4)
3034 int trace_array_printk_buf(struct ring_buffer *buffer,
3035                            unsigned long ip, const char *fmt, ...)
3036 {
3037         int ret;
3038         va_list ap;
3039
3040         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3041                 return 0;
3042
3043         va_start(ap, fmt);
3044         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3045         va_end(ap);
3046         return ret;
3047 }
3048
3049 __printf(2, 0)
3050 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3051 {
3052         return trace_array_vprintk(&global_trace, ip, fmt, args);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_vprintk);
3055
3056 static void trace_iterator_increment(struct trace_iterator *iter)
3057 {
3058         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3059
3060         iter->idx++;
3061         if (buf_iter)
3062                 ring_buffer_read(buf_iter, NULL);
3063 }
3064
3065 static struct trace_entry *
3066 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3067                 unsigned long *lost_events)
3068 {
3069         struct ring_buffer_event *event;
3070         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3071
3072         if (buf_iter)
3073                 event = ring_buffer_iter_peek(buf_iter, ts);
3074         else
3075                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3076                                          lost_events);
3077
3078         if (event) {
3079                 iter->ent_size = ring_buffer_event_length(event);
3080                 return ring_buffer_event_data(event);
3081         }
3082         iter->ent_size = 0;
3083         return NULL;
3084 }
3085
3086 static struct trace_entry *
3087 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3088                   unsigned long *missing_events, u64 *ent_ts)
3089 {
3090         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3091         struct trace_entry *ent, *next = NULL;
3092         unsigned long lost_events = 0, next_lost = 0;
3093         int cpu_file = iter->cpu_file;
3094         u64 next_ts = 0, ts;
3095         int next_cpu = -1;
3096         int next_size = 0;
3097         int cpu;
3098
3099         /*
3100          * If we are in a per_cpu trace file, don't bother by iterating over
3101          * all cpu and peek directly.
3102          */
3103         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3104                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3105                         return NULL;
3106                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3107                 if (ent_cpu)
3108                         *ent_cpu = cpu_file;
3109
3110                 return ent;
3111         }
3112
3113         for_each_tracing_cpu(cpu) {
3114
3115                 if (ring_buffer_empty_cpu(buffer, cpu))
3116                         continue;
3117
3118                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3119
3120                 /*
3121                  * Pick the entry with the smallest timestamp:
3122                  */
3123                 if (ent && (!next || ts < next_ts)) {
3124                         next = ent;
3125                         next_cpu = cpu;
3126                         next_ts = ts;
3127                         next_lost = lost_events;
3128                         next_size = iter->ent_size;
3129                 }
3130         }
3131
3132         iter->ent_size = next_size;
3133
3134         if (ent_cpu)
3135                 *ent_cpu = next_cpu;
3136
3137         if (ent_ts)
3138                 *ent_ts = next_ts;
3139
3140         if (missing_events)
3141                 *missing_events = next_lost;
3142
3143         return next;
3144 }
3145
3146 /* Find the next real entry, without updating the iterator itself */
3147 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3148                                           int *ent_cpu, u64 *ent_ts)
3149 {
3150         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3151 }
3152
3153 /* Find the next real entry, and increment the iterator to the next entry */
3154 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3155 {
3156         iter->ent = __find_next_entry(iter, &iter->cpu,
3157                                       &iter->lost_events, &iter->ts);
3158
3159         if (iter->ent)
3160                 trace_iterator_increment(iter);
3161
3162         return iter->ent ? iter : NULL;
3163 }
3164
3165 static void trace_consume(struct trace_iterator *iter)
3166 {
3167         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3168                             &iter->lost_events);
3169 }
3170
3171 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3172 {
3173         struct trace_iterator *iter = m->private;
3174         int i = (int)*pos;
3175         void *ent;
3176
3177         WARN_ON_ONCE(iter->leftover);
3178
3179         (*pos)++;
3180
3181         /* can't go backwards */
3182         if (iter->idx > i)
3183                 return NULL;
3184
3185         if (iter->idx < 0)
3186                 ent = trace_find_next_entry_inc(iter);
3187         else
3188                 ent = iter;
3189
3190         while (ent && iter->idx < i)
3191                 ent = trace_find_next_entry_inc(iter);
3192
3193         iter->pos = *pos;
3194
3195         return ent;
3196 }
3197
3198 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3199 {
3200         struct ring_buffer_event *event;
3201         struct ring_buffer_iter *buf_iter;
3202         unsigned long entries = 0;
3203         u64 ts;
3204
3205         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3206
3207         buf_iter = trace_buffer_iter(iter, cpu);
3208         if (!buf_iter)
3209                 return;
3210
3211         ring_buffer_iter_reset(buf_iter);
3212
3213         /*
3214          * We could have the case with the max latency tracers
3215          * that a reset never took place on a cpu. This is evident
3216          * by the timestamp being before the start of the buffer.
3217          */
3218         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3219                 if (ts >= iter->trace_buffer->time_start)
3220                         break;
3221                 entries++;
3222                 ring_buffer_read(buf_iter, NULL);
3223         }
3224
3225         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3226 }
3227
3228 /*
3229  * The current tracer is copied to avoid a global locking
3230  * all around.
3231  */
3232 static void *s_start(struct seq_file *m, loff_t *pos)
3233 {
3234         struct trace_iterator *iter = m->private;
3235         struct trace_array *tr = iter->tr;
3236         int cpu_file = iter->cpu_file;
3237         void *p = NULL;
3238         loff_t l = 0;
3239         int cpu;
3240
3241         /*
3242          * copy the tracer to avoid using a global lock all around.
3243          * iter->trace is a copy of current_trace, the pointer to the
3244          * name may be used instead of a strcmp(), as iter->trace->name
3245          * will point to the same string as current_trace->name.
3246          */
3247         mutex_lock(&trace_types_lock);
3248         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3249                 *iter->trace = *tr->current_trace;
3250         mutex_unlock(&trace_types_lock);
3251
3252 #ifdef CONFIG_TRACER_MAX_TRACE
3253         if (iter->snapshot && iter->trace->use_max_tr)
3254                 return ERR_PTR(-EBUSY);
3255 #endif
3256
3257         if (*pos != iter->pos) {
3258                 iter->ent = NULL;
3259                 iter->cpu = 0;
3260                 iter->idx = -1;
3261
3262                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3263                         for_each_tracing_cpu(cpu)
3264                                 tracing_iter_reset(iter, cpu);
3265                 } else
3266                         tracing_iter_reset(iter, cpu_file);
3267
3268                 iter->leftover = 0;
3269                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3270                         ;
3271
3272         } else {
3273                 /*
3274                  * If we overflowed the seq_file before, then we want
3275                  * to just reuse the trace_seq buffer again.
3276                  */
3277                 if (iter->leftover)
3278                         p = iter;
3279                 else {
3280                         l = *pos - 1;
3281                         p = s_next(m, p, &l);
3282                 }
3283         }
3284
3285         trace_event_read_lock();
3286         trace_access_lock(cpu_file);
3287         return p;
3288 }
3289
3290 static void s_stop(struct seq_file *m, void *p)
3291 {
3292         struct trace_iterator *iter = m->private;
3293
3294 #ifdef CONFIG_TRACER_MAX_TRACE
3295         if (iter->snapshot && iter->trace->use_max_tr)
3296                 return;
3297 #endif
3298
3299         trace_access_unlock(iter->cpu_file);
3300         trace_event_read_unlock();
3301 }
3302
3303 static void
3304 get_total_entries(struct trace_buffer *buf,
3305                   unsigned long *total, unsigned long *entries)
3306 {
3307         unsigned long count;
3308         int cpu;
3309
3310         *total = 0;
3311         *entries = 0;
3312
3313         for_each_tracing_cpu(cpu) {
3314                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3315                 /*
3316                  * If this buffer has skipped entries, then we hold all
3317                  * entries for the trace and we need to ignore the
3318                  * ones before the time stamp.
3319                  */
3320                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3321                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3322                         /* total is the same as the entries */
3323                         *total += count;
3324                 } else
3325                         *total += count +
3326                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3327                 *entries += count;
3328         }
3329 }
3330
3331 static void print_lat_help_header(struct seq_file *m)
3332 {
3333         seq_puts(m, "#                  _------=> CPU#            \n"
3334                     "#                 / _-----=> irqs-off        \n"
3335                     "#                | / _----=> need-resched    \n"
3336                     "#                || / _---=> hardirq/softirq \n"
3337                     "#                ||| / _--=> preempt-depth   \n"
3338                     "#                |||| /     delay            \n"
3339                     "#  cmd     pid   ||||| time  |   caller      \n"
3340                     "#     \\   /      |||||  \\    |   /         \n");
3341 }
3342
3343 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3344 {
3345         unsigned long total;
3346         unsigned long entries;
3347
3348         get_total_entries(buf, &total, &entries);
3349         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3350                    entries, total, num_online_cpus());
3351         seq_puts(m, "#\n");
3352 }
3353
3354 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3355                                    unsigned int flags)
3356 {
3357         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3358
3359         print_event_info(buf, m);
3360
3361         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3362         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3363 }
3364
3365 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3366                                        unsigned int flags)
3367 {
3368         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3369         const char tgid_space[] = "          ";
3370         const char space[] = "  ";
3371
3372         print_event_info(buf, m);
3373
3374         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3375                    tgid ? tgid_space : space);
3376         seq_printf(m, "#                          %s / _----=> need-resched\n",
3377                    tgid ? tgid_space : space);
3378         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3379                    tgid ? tgid_space : space);
3380         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3381                    tgid ? tgid_space : space);
3382         seq_printf(m, "#                          %s||| /     delay\n",
3383                    tgid ? tgid_space : space);
3384         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3385                    tgid ? "   TGID   " : space);
3386         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3387                    tgid ? "     |    " : space);
3388 }
3389
3390 void
3391 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3392 {
3393         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3394         struct trace_buffer *buf = iter->trace_buffer;
3395         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3396         struct tracer *type = iter->trace;
3397         unsigned long entries;
3398         unsigned long total;
3399         const char *name = "preemption";
3400
3401         name = type->name;
3402
3403         get_total_entries(buf, &total, &entries);
3404
3405         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3406                    name, UTS_RELEASE);
3407         seq_puts(m, "# -----------------------------------"
3408                  "---------------------------------\n");
3409         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3410                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3411                    nsecs_to_usecs(data->saved_latency),
3412                    entries,
3413                    total,
3414                    buf->cpu,
3415 #if defined(CONFIG_PREEMPT_NONE)
3416                    "server",
3417 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3418                    "desktop",
3419 #elif defined(CONFIG_PREEMPT)
3420                    "preempt",
3421 #else
3422                    "unknown",
3423 #endif
3424                    /* These are reserved for later use */
3425                    0, 0, 0, 0);
3426 #ifdef CONFIG_SMP
3427         seq_printf(m, " #P:%d)\n", num_online_cpus());
3428 #else
3429         seq_puts(m, ")\n");
3430 #endif
3431         seq_puts(m, "#    -----------------\n");
3432         seq_printf(m, "#    | task: %.16s-%d "
3433                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3434                    data->comm, data->pid,
3435                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3436                    data->policy, data->rt_priority);
3437         seq_puts(m, "#    -----------------\n");
3438
3439         if (data->critical_start) {
3440                 seq_puts(m, "#  => started at: ");
3441                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3442                 trace_print_seq(m, &iter->seq);
3443                 seq_puts(m, "\n#  => ended at:   ");
3444                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3445                 trace_print_seq(m, &iter->seq);
3446                 seq_puts(m, "\n#\n");
3447         }
3448
3449         seq_puts(m, "#\n");
3450 }
3451
3452 static void test_cpu_buff_start(struct trace_iterator *iter)
3453 {
3454         struct trace_seq *s = &iter->seq;
3455         struct trace_array *tr = iter->tr;
3456
3457         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3458                 return;
3459
3460         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3461                 return;
3462
3463         if (cpumask_available(iter->started) &&
3464             cpumask_test_cpu(iter->cpu, iter->started))
3465                 return;
3466
3467         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3468                 return;
3469
3470         if (cpumask_available(iter->started))
3471                 cpumask_set_cpu(iter->cpu, iter->started);
3472
3473         /* Don't print started cpu buffer for the first entry of the trace */
3474         if (iter->idx > 1)
3475                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3476                                 iter->cpu);
3477 }
3478
3479 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3480 {
3481         struct trace_array *tr = iter->tr;
3482         struct trace_seq *s = &iter->seq;
3483         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3484         struct trace_entry *entry;
3485         struct trace_event *event;
3486
3487         entry = iter->ent;
3488
3489         test_cpu_buff_start(iter);
3490
3491         event = ftrace_find_event(entry->type);
3492
3493         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3494                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3495                         trace_print_lat_context(iter);
3496                 else
3497                         trace_print_context(iter);
3498         }
3499
3500         if (trace_seq_has_overflowed(s))
3501                 return TRACE_TYPE_PARTIAL_LINE;
3502
3503         if (event)
3504                 return event->funcs->trace(iter, sym_flags, event);
3505
3506         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3507
3508         return trace_handle_return(s);
3509 }
3510
3511 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3512 {
3513         struct trace_array *tr = iter->tr;
3514         struct trace_seq *s = &iter->seq;
3515         struct trace_entry *entry;
3516         struct trace_event *event;
3517
3518         entry = iter->ent;
3519
3520         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3521                 trace_seq_printf(s, "%d %d %llu ",
3522                                  entry->pid, iter->cpu, iter->ts);
3523
3524         if (trace_seq_has_overflowed(s))
3525                 return TRACE_TYPE_PARTIAL_LINE;
3526
3527         event = ftrace_find_event(entry->type);
3528         if (event)
3529                 return event->funcs->raw(iter, 0, event);
3530
3531         trace_seq_printf(s, "%d ?\n", entry->type);
3532
3533         return trace_handle_return(s);
3534 }
3535
3536 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3537 {
3538         struct trace_array *tr = iter->tr;
3539         struct trace_seq *s = &iter->seq;
3540         unsigned char newline = '\n';
3541         struct trace_entry *entry;
3542         struct trace_event *event;
3543
3544         entry = iter->ent;
3545
3546         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3547                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3548                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3549                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3550                 if (trace_seq_has_overflowed(s))
3551                         return TRACE_TYPE_PARTIAL_LINE;
3552         }
3553
3554         event = ftrace_find_event(entry->type);
3555         if (event) {
3556                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3557                 if (ret != TRACE_TYPE_HANDLED)
3558                         return ret;
3559         }
3560
3561         SEQ_PUT_FIELD(s, newline);
3562
3563         return trace_handle_return(s);
3564 }
3565
3566 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3567 {
3568         struct trace_array *tr = iter->tr;
3569         struct trace_seq *s = &iter->seq;
3570         struct trace_entry *entry;
3571         struct trace_event *event;
3572
3573         entry = iter->ent;
3574
3575         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3576                 SEQ_PUT_FIELD(s, entry->pid);
3577                 SEQ_PUT_FIELD(s, iter->cpu);
3578                 SEQ_PUT_FIELD(s, iter->ts);
3579                 if (trace_seq_has_overflowed(s))
3580                         return TRACE_TYPE_PARTIAL_LINE;
3581         }
3582
3583         event = ftrace_find_event(entry->type);
3584         return event ? event->funcs->binary(iter, 0, event) :
3585                 TRACE_TYPE_HANDLED;
3586 }
3587
3588 int trace_empty(struct trace_iterator *iter)
3589 {
3590         struct ring_buffer_iter *buf_iter;
3591         int cpu;
3592
3593         /* If we are looking at one CPU buffer, only check that one */
3594         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3595                 cpu = iter->cpu_file;
3596                 buf_iter = trace_buffer_iter(iter, cpu);
3597                 if (buf_iter) {
3598                         if (!ring_buffer_iter_empty(buf_iter))
3599                                 return 0;
3600                 } else {
3601                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3602                                 return 0;
3603                 }
3604                 return 1;
3605         }
3606
3607         for_each_tracing_cpu(cpu) {
3608                 buf_iter = trace_buffer_iter(iter, cpu);
3609                 if (buf_iter) {
3610                         if (!ring_buffer_iter_empty(buf_iter))
3611                                 return 0;
3612                 } else {
3613                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614                                 return 0;
3615                 }
3616         }
3617
3618         return 1;
3619 }
3620
3621 /*  Called with trace_event_read_lock() held. */
3622 enum print_line_t print_trace_line(struct trace_iterator *iter)
3623 {
3624         struct trace_array *tr = iter->tr;
3625         unsigned long trace_flags = tr->trace_flags;
3626         enum print_line_t ret;
3627
3628         if (iter->lost_events) {
3629                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3630                                  iter->cpu, iter->lost_events);
3631                 if (trace_seq_has_overflowed(&iter->seq))
3632                         return TRACE_TYPE_PARTIAL_LINE;
3633         }
3634
3635         if (iter->trace && iter->trace->print_line) {
3636                 ret = iter->trace->print_line(iter);
3637                 if (ret != TRACE_TYPE_UNHANDLED)
3638                         return ret;
3639         }
3640
3641         if (iter->ent->type == TRACE_BPUTS &&
3642                         trace_flags & TRACE_ITER_PRINTK &&
3643                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3644                 return trace_print_bputs_msg_only(iter);
3645
3646         if (iter->ent->type == TRACE_BPRINT &&
3647                         trace_flags & TRACE_ITER_PRINTK &&
3648                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649                 return trace_print_bprintk_msg_only(iter);
3650
3651         if (iter->ent->type == TRACE_PRINT &&
3652                         trace_flags & TRACE_ITER_PRINTK &&
3653                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654                 return trace_print_printk_msg_only(iter);
3655
3656         if (trace_flags & TRACE_ITER_BIN)
3657                 return print_bin_fmt(iter);
3658
3659         if (trace_flags & TRACE_ITER_HEX)
3660                 return print_hex_fmt(iter);
3661
3662         if (trace_flags & TRACE_ITER_RAW)
3663                 return print_raw_fmt(iter);
3664
3665         return print_trace_fmt(iter);
3666 }
3667
3668 void trace_latency_header(struct seq_file *m)
3669 {
3670         struct trace_iterator *iter = m->private;
3671         struct trace_array *tr = iter->tr;
3672
3673         /* print nothing if the buffers are empty */
3674         if (trace_empty(iter))
3675                 return;
3676
3677         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3678                 print_trace_header(m, iter);
3679
3680         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3681                 print_lat_help_header(m);
3682 }
3683
3684 void trace_default_header(struct seq_file *m)
3685 {
3686         struct trace_iterator *iter = m->private;
3687         struct trace_array *tr = iter->tr;
3688         unsigned long trace_flags = tr->trace_flags;
3689
3690         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3691                 return;
3692
3693         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3694                 /* print nothing if the buffers are empty */
3695                 if (trace_empty(iter))
3696                         return;
3697                 print_trace_header(m, iter);
3698                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3699                         print_lat_help_header(m);
3700         } else {
3701                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3702                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3703                                 print_func_help_header_irq(iter->trace_buffer,
3704                                                            m, trace_flags);
3705                         else
3706                                 print_func_help_header(iter->trace_buffer, m,
3707                                                        trace_flags);
3708                 }
3709         }
3710 }
3711
3712 static void test_ftrace_alive(struct seq_file *m)
3713 {
3714         if (!ftrace_is_dead())
3715                 return;
3716         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3717                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3718 }
3719
3720 #ifdef CONFIG_TRACER_MAX_TRACE
3721 static void show_snapshot_main_help(struct seq_file *m)
3722 {
3723         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3724                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3725                     "#                      Takes a snapshot of the main buffer.\n"
3726                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3727                     "#                      (Doesn't have to be '2' works with any number that\n"
3728                     "#                       is not a '0' or '1')\n");
3729 }
3730
3731 static void show_snapshot_percpu_help(struct seq_file *m)
3732 {
3733         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3734 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3735         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3736                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3737 #else
3738         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3739                     "#                     Must use main snapshot file to allocate.\n");
3740 #endif
3741         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3742                     "#                      (Doesn't have to be '2' works with any number that\n"
3743                     "#                       is not a '0' or '1')\n");
3744 }
3745
3746 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3747 {
3748         if (iter->tr->allocated_snapshot)
3749                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3750         else
3751                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3752
3753         seq_puts(m, "# Snapshot commands:\n");
3754         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3755                 show_snapshot_main_help(m);
3756         else
3757                 show_snapshot_percpu_help(m);
3758 }
3759 #else
3760 /* Should never be called */
3761 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3762 #endif
3763
3764 static int s_show(struct seq_file *m, void *v)
3765 {
3766         struct trace_iterator *iter = v;
3767         int ret;
3768
3769         if (iter->ent == NULL) {
3770                 if (iter->tr) {
3771                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3772                         seq_puts(m, "#\n");
3773                         test_ftrace_alive(m);
3774                 }
3775                 if (iter->snapshot && trace_empty(iter))
3776                         print_snapshot_help(m, iter);
3777                 else if (iter->trace && iter->trace->print_header)
3778                         iter->trace->print_header(m);
3779                 else
3780                         trace_default_header(m);
3781
3782         } else if (iter->leftover) {
3783                 /*
3784                  * If we filled the seq_file buffer earlier, we
3785                  * want to just show it now.
3786                  */
3787                 ret = trace_print_seq(m, &iter->seq);
3788
3789                 /* ret should this time be zero, but you never know */
3790                 iter->leftover = ret;
3791
3792         } else {
3793                 print_trace_line(iter);
3794                 ret = trace_print_seq(m, &iter->seq);
3795                 /*
3796                  * If we overflow the seq_file buffer, then it will
3797                  * ask us for this data again at start up.
3798                  * Use that instead.
3799                  *  ret is 0 if seq_file write succeeded.
3800                  *        -1 otherwise.
3801                  */
3802                 iter->leftover = ret;
3803         }
3804
3805         return 0;
3806 }
3807
3808 /*
3809  * Should be used after trace_array_get(), trace_types_lock
3810  * ensures that i_cdev was already initialized.
3811  */
3812 static inline int tracing_get_cpu(struct inode *inode)
3813 {
3814         if (inode->i_cdev) /* See trace_create_cpu_file() */
3815                 return (long)inode->i_cdev - 1;
3816         return RING_BUFFER_ALL_CPUS;
3817 }
3818
3819 static const struct seq_operations tracer_seq_ops = {
3820         .start          = s_start,
3821         .next           = s_next,
3822         .stop           = s_stop,
3823         .show           = s_show,
3824 };
3825
3826 static struct trace_iterator *
3827 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3828 {
3829         struct trace_array *tr = inode->i_private;
3830         struct trace_iterator *iter;
3831         int cpu;
3832
3833         if (tracing_disabled)
3834                 return ERR_PTR(-ENODEV);
3835
3836         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3837         if (!iter)
3838                 return ERR_PTR(-ENOMEM);
3839
3840         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3841                                     GFP_KERNEL);
3842         if (!iter->buffer_iter)
3843                 goto release;
3844
3845         /*
3846          * We make a copy of the current tracer to avoid concurrent
3847          * changes on it while we are reading.
3848          */
3849         mutex_lock(&trace_types_lock);
3850         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3851         if (!iter->trace)
3852                 goto fail;
3853
3854         *iter->trace = *tr->current_trace;
3855
3856         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3857                 goto fail;
3858
3859         iter->tr = tr;
3860
3861 #ifdef CONFIG_TRACER_MAX_TRACE
3862         /* Currently only the top directory has a snapshot */
3863         if (tr->current_trace->print_max || snapshot)
3864                 iter->trace_buffer = &tr->max_buffer;
3865         else
3866 #endif
3867                 iter->trace_buffer = &tr->trace_buffer;
3868         iter->snapshot = snapshot;
3869         iter->pos = -1;
3870         iter->cpu_file = tracing_get_cpu(inode);
3871         mutex_init(&iter->mutex);
3872
3873         /* Notify the tracer early; before we stop tracing. */
3874         if (iter->trace && iter->trace->open)
3875                 iter->trace->open(iter);
3876
3877         /* Annotate start of buffers if we had overruns */
3878         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3879                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3880
3881         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3882         if (trace_clocks[tr->clock_id].in_ns)
3883                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3884
3885         /* stop the trace while dumping if we are not opening "snapshot" */
3886         if (!iter->snapshot)
3887                 tracing_stop_tr(tr);
3888
3889         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3890                 for_each_tracing_cpu(cpu) {
3891                         iter->buffer_iter[cpu] =
3892                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3893                                                          cpu, GFP_KERNEL);
3894                 }
3895                 ring_buffer_read_prepare_sync();
3896                 for_each_tracing_cpu(cpu) {
3897                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3898                         tracing_iter_reset(iter, cpu);
3899                 }
3900         } else {
3901                 cpu = iter->cpu_file;
3902                 iter->buffer_iter[cpu] =
3903                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3904                                                  cpu, GFP_KERNEL);
3905                 ring_buffer_read_prepare_sync();
3906                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3907                 tracing_iter_reset(iter, cpu);
3908         }
3909
3910         mutex_unlock(&trace_types_lock);
3911
3912         return iter;
3913
3914  fail:
3915         mutex_unlock(&trace_types_lock);
3916         kfree(iter->trace);
3917         kfree(iter->buffer_iter);
3918 release:
3919         seq_release_private(inode, file);
3920         return ERR_PTR(-ENOMEM);
3921 }
3922
3923 int tracing_open_generic(struct inode *inode, struct file *filp)
3924 {
3925         if (tracing_disabled)
3926                 return -ENODEV;
3927
3928         filp->private_data = inode->i_private;
3929         return 0;
3930 }
3931
3932 bool tracing_is_disabled(void)
3933 {
3934         return (tracing_disabled) ? true: false;
3935 }
3936
3937 /*
3938  * Open and update trace_array ref count.
3939  * Must have the current trace_array passed to it.
3940  */
3941 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3942 {
3943         struct trace_array *tr = inode->i_private;
3944
3945         if (tracing_disabled)
3946                 return -ENODEV;
3947
3948         if (trace_array_get(tr) < 0)
3949                 return -ENODEV;
3950
3951         filp->private_data = inode->i_private;
3952
3953         return 0;
3954 }
3955
3956 static int tracing_release(struct inode *inode, struct file *file)
3957 {
3958         struct trace_array *tr = inode->i_private;
3959         struct seq_file *m = file->private_data;
3960         struct trace_iterator *iter;
3961         int cpu;
3962
3963         if (!(file->f_mode & FMODE_READ)) {
3964                 trace_array_put(tr);
3965                 return 0;
3966         }
3967
3968         /* Writes do not use seq_file */
3969         iter = m->private;
3970         mutex_lock(&trace_types_lock);
3971
3972         for_each_tracing_cpu(cpu) {
3973                 if (iter->buffer_iter[cpu])
3974                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3975         }
3976
3977         if (iter->trace && iter->trace->close)
3978                 iter->trace->close(iter);
3979
3980         if (!iter->snapshot)
3981                 /* reenable tracing if it was previously enabled */
3982                 tracing_start_tr(tr);
3983
3984         __trace_array_put(tr);
3985
3986         mutex_unlock(&trace_types_lock);
3987
3988         mutex_destroy(&iter->mutex);
3989         free_cpumask_var(iter->started);
3990         kfree(iter->trace);
3991         kfree(iter->buffer_iter);
3992         seq_release_private(inode, file);
3993
3994         return 0;
3995 }
3996
3997 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3998 {
3999         struct trace_array *tr = inode->i_private;
4000
4001         trace_array_put(tr);
4002         return 0;
4003 }
4004
4005 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4006 {
4007         struct trace_array *tr = inode->i_private;
4008
4009         trace_array_put(tr);
4010
4011         return single_release(inode, file);
4012 }
4013
4014 static int tracing_open(struct inode *inode, struct file *file)
4015 {
4016         struct trace_array *tr = inode->i_private;
4017         struct trace_iterator *iter;
4018         int ret = 0;
4019
4020         if (trace_array_get(tr) < 0)
4021                 return -ENODEV;
4022
4023         /* If this file was open for write, then erase contents */
4024         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4025                 int cpu = tracing_get_cpu(inode);
4026                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4027
4028 #ifdef CONFIG_TRACER_MAX_TRACE
4029                 if (tr->current_trace->print_max)
4030                         trace_buf = &tr->max_buffer;
4031 #endif
4032
4033                 if (cpu == RING_BUFFER_ALL_CPUS)
4034                         tracing_reset_online_cpus(trace_buf);
4035                 else
4036                         tracing_reset(trace_buf, cpu);
4037         }
4038
4039         if (file->f_mode & FMODE_READ) {
4040                 iter = __tracing_open(inode, file, false);
4041                 if (IS_ERR(iter))
4042                         ret = PTR_ERR(iter);
4043                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4044                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4045         }
4046
4047         if (ret < 0)
4048                 trace_array_put(tr);
4049
4050         return ret;
4051 }
4052
4053 /*
4054  * Some tracers are not suitable for instance buffers.
4055  * A tracer is always available for the global array (toplevel)
4056  * or if it explicitly states that it is.
4057  */
4058 static bool
4059 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4060 {
4061         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4062 }
4063
4064 /* Find the next tracer that this trace array may use */
4065 static struct tracer *
4066 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4067 {
4068         while (t && !trace_ok_for_array(t, tr))
4069                 t = t->next;
4070
4071         return t;
4072 }
4073
4074 static void *
4075 t_next(struct seq_file *m, void *v, loff_t *pos)
4076 {
4077         struct trace_array *tr = m->private;
4078         struct tracer *t = v;
4079
4080         (*pos)++;
4081
4082         if (t)
4083                 t = get_tracer_for_array(tr, t->next);
4084
4085         return t;
4086 }
4087
4088 static void *t_start(struct seq_file *m, loff_t *pos)
4089 {
4090         struct trace_array *tr = m->private;
4091         struct tracer *t;
4092         loff_t l = 0;
4093
4094         mutex_lock(&trace_types_lock);
4095
4096         t = get_tracer_for_array(tr, trace_types);
4097         for (; t && l < *pos; t = t_next(m, t, &l))
4098                         ;
4099
4100         return t;
4101 }
4102
4103 static void t_stop(struct seq_file *m, void *p)
4104 {
4105         mutex_unlock(&trace_types_lock);
4106 }
4107
4108 static int t_show(struct seq_file *m, void *v)
4109 {
4110         struct tracer *t = v;
4111
4112         if (!t)
4113                 return 0;
4114
4115         seq_puts(m, t->name);
4116         if (t->next)
4117                 seq_putc(m, ' ');
4118         else
4119                 seq_putc(m, '\n');
4120
4121         return 0;
4122 }
4123
4124 static const struct seq_operations show_traces_seq_ops = {
4125         .start          = t_start,
4126         .next           = t_next,
4127         .stop           = t_stop,
4128         .show           = t_show,
4129 };
4130
4131 static int show_traces_open(struct inode *inode, struct file *file)
4132 {
4133         struct trace_array *tr = inode->i_private;
4134         struct seq_file *m;
4135         int ret;
4136
4137         if (tracing_disabled)
4138                 return -ENODEV;
4139
4140         if (trace_array_get(tr) < 0)
4141                 return -ENODEV;
4142
4143         ret = seq_open(file, &show_traces_seq_ops);
4144         if (ret) {
4145                 trace_array_put(tr);
4146                 return ret;
4147         }
4148
4149         m = file->private_data;
4150         m->private = tr;
4151
4152         return 0;
4153 }
4154
4155 static int show_traces_release(struct inode *inode, struct file *file)
4156 {
4157         struct trace_array *tr = inode->i_private;
4158
4159         trace_array_put(tr);
4160         return seq_release(inode, file);
4161 }
4162
4163 static ssize_t
4164 tracing_write_stub(struct file *filp, const char __user *ubuf,
4165                    size_t count, loff_t *ppos)
4166 {
4167         return count;
4168 }
4169
4170 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4171 {
4172         int ret;
4173
4174         if (file->f_mode & FMODE_READ)
4175                 ret = seq_lseek(file, offset, whence);
4176         else
4177                 file->f_pos = ret = 0;
4178
4179         return ret;
4180 }
4181
4182 static const struct file_operations tracing_fops = {
4183         .open           = tracing_open,
4184         .read           = seq_read,
4185         .write          = tracing_write_stub,
4186         .llseek         = tracing_lseek,
4187         .release        = tracing_release,
4188 };
4189
4190 static const struct file_operations show_traces_fops = {
4191         .open           = show_traces_open,
4192         .read           = seq_read,
4193         .llseek         = seq_lseek,
4194         .release        = show_traces_release,
4195 };
4196
4197 static ssize_t
4198 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4199                      size_t count, loff_t *ppos)
4200 {
4201         struct trace_array *tr = file_inode(filp)->i_private;
4202         char *mask_str;
4203         int len;
4204
4205         len = snprintf(NULL, 0, "%*pb\n",
4206                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4207         mask_str = kmalloc(len, GFP_KERNEL);
4208         if (!mask_str)
4209                 return -ENOMEM;
4210
4211         len = snprintf(mask_str, len, "%*pb\n",
4212                        cpumask_pr_args(tr->tracing_cpumask));
4213         if (len >= count) {
4214                 count = -EINVAL;
4215                 goto out_err;
4216         }
4217         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4218
4219 out_err:
4220         kfree(mask_str);
4221
4222         return count;
4223 }
4224
4225 static ssize_t
4226 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4227                       size_t count, loff_t *ppos)
4228 {
4229         struct trace_array *tr = file_inode(filp)->i_private;
4230         cpumask_var_t tracing_cpumask_new;
4231         int err, cpu;
4232
4233         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4234                 return -ENOMEM;
4235
4236         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4237         if (err)
4238                 goto err_unlock;
4239
4240         local_irq_disable();
4241         arch_spin_lock(&tr->max_lock);
4242         for_each_tracing_cpu(cpu) {
4243                 /*
4244                  * Increase/decrease the disabled counter if we are
4245                  * about to flip a bit in the cpumask:
4246                  */
4247                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4248                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4249                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4250                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4251                 }
4252                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4253                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4254                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4255                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4256                 }
4257         }
4258         arch_spin_unlock(&tr->max_lock);
4259         local_irq_enable();
4260
4261         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4262         free_cpumask_var(tracing_cpumask_new);
4263
4264         return count;
4265
4266 err_unlock:
4267         free_cpumask_var(tracing_cpumask_new);
4268
4269         return err;
4270 }
4271
4272 static const struct file_operations tracing_cpumask_fops = {
4273         .open           = tracing_open_generic_tr,
4274         .read           = tracing_cpumask_read,
4275         .write          = tracing_cpumask_write,
4276         .release        = tracing_release_generic_tr,
4277         .llseek         = generic_file_llseek,
4278 };
4279
4280 static int tracing_trace_options_show(struct seq_file *m, void *v)
4281 {
4282         struct tracer_opt *trace_opts;
4283         struct trace_array *tr = m->private;
4284         u32 tracer_flags;
4285         int i;
4286
4287         mutex_lock(&trace_types_lock);
4288         tracer_flags = tr->current_trace->flags->val;
4289         trace_opts = tr->current_trace->flags->opts;
4290
4291         for (i = 0; trace_options[i]; i++) {
4292                 if (tr->trace_flags & (1 << i))
4293                         seq_printf(m, "%s\n", trace_options[i]);
4294                 else
4295                         seq_printf(m, "no%s\n", trace_options[i]);
4296         }
4297
4298         for (i = 0; trace_opts[i].name; i++) {
4299                 if (tracer_flags & trace_opts[i].bit)
4300                         seq_printf(m, "%s\n", trace_opts[i].name);
4301                 else
4302                         seq_printf(m, "no%s\n", trace_opts[i].name);
4303         }
4304         mutex_unlock(&trace_types_lock);
4305
4306         return 0;
4307 }
4308
4309 static int __set_tracer_option(struct trace_array *tr,
4310                                struct tracer_flags *tracer_flags,
4311                                struct tracer_opt *opts, int neg)
4312 {
4313         struct tracer *trace = tracer_flags->trace;
4314         int ret;
4315
4316         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4317         if (ret)
4318                 return ret;
4319
4320         if (neg)
4321                 tracer_flags->val &= ~opts->bit;
4322         else
4323                 tracer_flags->val |= opts->bit;
4324         return 0;
4325 }
4326
4327 /* Try to assign a tracer specific option */
4328 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4329 {
4330         struct tracer *trace = tr->current_trace;
4331         struct tracer_flags *tracer_flags = trace->flags;
4332         struct tracer_opt *opts = NULL;
4333         int i;
4334
4335         for (i = 0; tracer_flags->opts[i].name; i++) {
4336                 opts = &tracer_flags->opts[i];
4337
4338                 if (strcmp(cmp, opts->name) == 0)
4339                         return __set_tracer_option(tr, trace->flags, opts, neg);
4340         }
4341
4342         return -EINVAL;
4343 }
4344
4345 /* Some tracers require overwrite to stay enabled */
4346 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4347 {
4348         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4349                 return -1;
4350
4351         return 0;
4352 }
4353
4354 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4355 {
4356         if ((mask == TRACE_ITER_RECORD_TGID) ||
4357             (mask == TRACE_ITER_RECORD_CMD))
4358                 lockdep_assert_held(&event_mutex);
4359
4360         /* do nothing if flag is already set */
4361         if (!!(tr->trace_flags & mask) == !!enabled)
4362                 return 0;
4363
4364         /* Give the tracer a chance to approve the change */
4365         if (tr->current_trace->flag_changed)
4366                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4367                         return -EINVAL;
4368
4369         if (enabled)
4370                 tr->trace_flags |= mask;
4371         else
4372                 tr->trace_flags &= ~mask;
4373
4374         if (mask == TRACE_ITER_RECORD_CMD)
4375                 trace_event_enable_cmd_record(enabled);
4376
4377         if (mask == TRACE_ITER_RECORD_TGID) {
4378                 if (!tgid_map)
4379                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4380                                            GFP_KERNEL);
4381                 if (!tgid_map) {
4382                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4383                         return -ENOMEM;
4384                 }
4385
4386                 trace_event_enable_tgid_record(enabled);
4387         }
4388
4389         if (mask == TRACE_ITER_EVENT_FORK)
4390                 trace_event_follow_fork(tr, enabled);
4391
4392         if (mask == TRACE_ITER_FUNC_FORK)
4393                 ftrace_pid_follow_fork(tr, enabled);
4394
4395         if (mask == TRACE_ITER_OVERWRITE) {
4396                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4397 #ifdef CONFIG_TRACER_MAX_TRACE
4398                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4399 #endif
4400         }
4401
4402         if (mask == TRACE_ITER_PRINTK) {
4403                 trace_printk_start_stop_comm(enabled);
4404                 trace_printk_control(enabled);
4405         }
4406
4407         return 0;
4408 }
4409
4410 static int trace_set_options(struct trace_array *tr, char *option)
4411 {
4412         char *cmp;
4413         int neg = 0;
4414         int ret = -ENODEV;
4415         int i;
4416         size_t orig_len = strlen(option);
4417
4418         cmp = strstrip(option);
4419
4420         if (strncmp(cmp, "no", 2) == 0) {
4421                 neg = 1;
4422                 cmp += 2;
4423         }
4424
4425         mutex_lock(&event_mutex);
4426         mutex_lock(&trace_types_lock);
4427
4428         for (i = 0; trace_options[i]; i++) {
4429                 if (strcmp(cmp, trace_options[i]) == 0) {
4430                         ret = set_tracer_flag(tr, 1 << i, !neg);
4431                         break;
4432                 }
4433         }
4434
4435         /* If no option could be set, test the specific tracer options */
4436         if (!trace_options[i])
4437                 ret = set_tracer_option(tr, cmp, neg);
4438
4439         mutex_unlock(&trace_types_lock);
4440         mutex_unlock(&event_mutex);
4441
4442         /*
4443          * If the first trailing whitespace is replaced with '\0' by strstrip,
4444          * turn it back into a space.
4445          */
4446         if (orig_len > strlen(option))
4447                 option[strlen(option)] = ' ';
4448
4449         return ret;
4450 }
4451
4452 static void __init apply_trace_boot_options(void)
4453 {
4454         char *buf = trace_boot_options_buf;
4455         char *option;
4456
4457         while (true) {
4458                 option = strsep(&buf, ",");
4459
4460                 if (!option)
4461                         break;
4462
4463                 if (*option)
4464                         trace_set_options(&global_trace, option);
4465
4466                 /* Put back the comma to allow this to be called again */
4467                 if (buf)
4468                         *(buf - 1) = ',';
4469         }
4470 }
4471
4472 static ssize_t
4473 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4474                         size_t cnt, loff_t *ppos)
4475 {
4476         struct seq_file *m = filp->private_data;
4477         struct trace_array *tr = m->private;
4478         char buf[64];
4479         int ret;
4480
4481         if (cnt >= sizeof(buf))
4482                 return -EINVAL;
4483
4484         if (copy_from_user(buf, ubuf, cnt))
4485                 return -EFAULT;
4486
4487         buf[cnt] = 0;
4488
4489         ret = trace_set_options(tr, buf);
4490         if (ret < 0)
4491                 return ret;
4492
4493         *ppos += cnt;
4494
4495         return cnt;
4496 }
4497
4498 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4499 {
4500         struct trace_array *tr = inode->i_private;
4501         int ret;
4502
4503         if (tracing_disabled)
4504                 return -ENODEV;
4505
4506         if (trace_array_get(tr) < 0)
4507                 return -ENODEV;
4508
4509         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4510         if (ret < 0)
4511                 trace_array_put(tr);
4512
4513         return ret;
4514 }
4515
4516 static const struct file_operations tracing_iter_fops = {
4517         .open           = tracing_trace_options_open,
4518         .read           = seq_read,
4519         .llseek         = seq_lseek,
4520         .release        = tracing_single_release_tr,
4521         .write          = tracing_trace_options_write,
4522 };
4523
4524 static const char readme_msg[] =
4525         "tracing mini-HOWTO:\n\n"
4526         "# echo 0 > tracing_on : quick way to disable tracing\n"
4527         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4528         " Important files:\n"
4529         "  trace\t\t\t- The static contents of the buffer\n"
4530         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4531         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4532         "  current_tracer\t- function and latency tracers\n"
4533         "  available_tracers\t- list of configured tracers for current_tracer\n"
4534         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4535         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4536         "  trace_clock\t\t-change the clock used to order events\n"
4537         "       local:   Per cpu clock but may not be synced across CPUs\n"
4538         "      global:   Synced across CPUs but slows tracing down.\n"
4539         "     counter:   Not a clock, but just an increment\n"
4540         "      uptime:   Jiffy counter from time of boot\n"
4541         "        perf:   Same clock that perf events use\n"
4542 #ifdef CONFIG_X86_64
4543         "     x86-tsc:   TSC cycle counter\n"
4544 #endif
4545         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4546         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4547         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4548         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4549         "\t\t\t  Remove sub-buffer with rmdir\n"
4550         "  trace_options\t\t- Set format or modify how tracing happens\n"
4551         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4552         "\t\t\t  option name\n"
4553         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4554 #ifdef CONFIG_DYNAMIC_FTRACE
4555         "\n  available_filter_functions - list of functions that can be filtered on\n"
4556         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4557         "\t\t\t  functions\n"
4558         "\t     accepts: func_full_name or glob-matching-pattern\n"
4559         "\t     modules: Can select a group via module\n"
4560         "\t      Format: :mod:<module-name>\n"
4561         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4562         "\t    triggers: a command to perform when function is hit\n"
4563         "\t      Format: <function>:<trigger>[:count]\n"
4564         "\t     trigger: traceon, traceoff\n"
4565         "\t\t      enable_event:<system>:<event>\n"
4566         "\t\t      disable_event:<system>:<event>\n"
4567 #ifdef CONFIG_STACKTRACE
4568         "\t\t      stacktrace\n"
4569 #endif
4570 #ifdef CONFIG_TRACER_SNAPSHOT
4571         "\t\t      snapshot\n"
4572 #endif
4573         "\t\t      dump\n"
4574         "\t\t      cpudump\n"
4575         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4576         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4577         "\t     The first one will disable tracing every time do_fault is hit\n"
4578         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4579         "\t       The first time do trap is hit and it disables tracing, the\n"
4580         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4581         "\t       the counter will not decrement. It only decrements when the\n"
4582         "\t       trigger did work\n"
4583         "\t     To remove trigger without count:\n"
4584         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4585         "\t     To remove trigger with a count:\n"
4586         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4587         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4588         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4589         "\t    modules: Can select a group via module command :mod:\n"
4590         "\t    Does not accept triggers\n"
4591 #endif /* CONFIG_DYNAMIC_FTRACE */
4592 #ifdef CONFIG_FUNCTION_TRACER
4593         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4594         "\t\t    (function)\n"
4595 #endif
4596 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4597         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4598         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4599         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4600 #endif
4601 #ifdef CONFIG_TRACER_SNAPSHOT
4602         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4603         "\t\t\t  snapshot buffer. Read the contents for more\n"
4604         "\t\t\t  information\n"
4605 #endif
4606 #ifdef CONFIG_STACK_TRACER
4607         "  stack_trace\t\t- Shows the max stack trace when active\n"
4608         "  stack_max_size\t- Shows current max stack size that was traced\n"
4609         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4610         "\t\t\t  new trace)\n"
4611 #ifdef CONFIG_DYNAMIC_FTRACE
4612         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4613         "\t\t\t  traces\n"
4614 #endif
4615 #endif /* CONFIG_STACK_TRACER */
4616 #ifdef CONFIG_KPROBE_EVENTS
4617         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4618         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4619 #endif
4620 #ifdef CONFIG_UPROBE_EVENTS
4621         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4622         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4623 #endif
4624 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4625         "\t  accepts: event-definitions (one definition per line)\n"
4626         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4627         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4628         "\t           -:[<group>/]<event>\n"
4629 #ifdef CONFIG_KPROBE_EVENTS
4630         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4631   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4632 #endif
4633 #ifdef CONFIG_UPROBE_EVENTS
4634         "\t    place: <path>:<offset>\n"
4635 #endif
4636         "\t     args: <name>=fetcharg[:type]\n"
4637         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4638         "\t           $stack<index>, $stack, $retval, $comm\n"
4639         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4640         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4641 #endif
4642         "  events/\t\t- Directory containing all trace event subsystems:\n"
4643         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4644         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4645         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4646         "\t\t\t  events\n"
4647         "      filter\t\t- If set, only events passing filter are traced\n"
4648         "  events/<system>/<event>/\t- Directory containing control files for\n"
4649         "\t\t\t  <event>:\n"
4650         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4651         "      filter\t\t- If set, only events passing filter are traced\n"
4652         "      trigger\t\t- If set, a command to perform when event is hit\n"
4653         "\t    Format: <trigger>[:count][if <filter>]\n"
4654         "\t   trigger: traceon, traceoff\n"
4655         "\t            enable_event:<system>:<event>\n"
4656         "\t            disable_event:<system>:<event>\n"
4657 #ifdef CONFIG_HIST_TRIGGERS
4658         "\t            enable_hist:<system>:<event>\n"
4659         "\t            disable_hist:<system>:<event>\n"
4660 #endif
4661 #ifdef CONFIG_STACKTRACE
4662         "\t\t    stacktrace\n"
4663 #endif
4664 #ifdef CONFIG_TRACER_SNAPSHOT
4665         "\t\t    snapshot\n"
4666 #endif
4667 #ifdef CONFIG_HIST_TRIGGERS
4668         "\t\t    hist (see below)\n"
4669 #endif
4670         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4671         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4672         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4673         "\t                  events/block/block_unplug/trigger\n"
4674         "\t   The first disables tracing every time block_unplug is hit.\n"
4675         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4676         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4677         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4678         "\t   Like function triggers, the counter is only decremented if it\n"
4679         "\t    enabled or disabled tracing.\n"
4680         "\t   To remove a trigger without a count:\n"
4681         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4682         "\t   To remove a trigger with a count:\n"
4683         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4684         "\t   Filters can be ignored when removing a trigger.\n"
4685 #ifdef CONFIG_HIST_TRIGGERS
4686         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4687         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4688         "\t            [:values=<field1[,field2,...]>]\n"
4689         "\t            [:sort=<field1[,field2,...]>]\n"
4690         "\t            [:size=#entries]\n"
4691         "\t            [:pause][:continue][:clear]\n"
4692         "\t            [:name=histname1]\n"
4693         "\t            [if <filter>]\n\n"
4694         "\t    When a matching event is hit, an entry is added to a hash\n"
4695         "\t    table using the key(s) and value(s) named, and the value of a\n"
4696         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4697         "\t    correspond to fields in the event's format description.  Keys\n"
4698         "\t    can be any field, or the special string 'stacktrace'.\n"
4699         "\t    Compound keys consisting of up to two fields can be specified\n"
4700         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4701         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4702         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4703         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4704         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4705         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4706         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4707         "\t    its histogram data will be shared with other triggers of the\n"
4708         "\t    same name, and trigger hits will update this common data.\n\n"
4709         "\t    Reading the 'hist' file for the event will dump the hash\n"
4710         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4711         "\t    triggers attached to an event, there will be a table for each\n"
4712         "\t    trigger in the output.  The table displayed for a named\n"
4713         "\t    trigger will be the same as any other instance having the\n"
4714         "\t    same name.  The default format used to display a given field\n"
4715         "\t    can be modified by appending any of the following modifiers\n"
4716         "\t    to the field name, as applicable:\n\n"
4717         "\t            .hex        display a number as a hex value\n"
4718         "\t            .sym        display an address as a symbol\n"
4719         "\t            .sym-offset display an address as a symbol and offset\n"
4720         "\t            .execname   display a common_pid as a program name\n"
4721         "\t            .syscall    display a syscall id as a syscall name\n\n"
4722         "\t            .log2       display log2 value rather than raw number\n\n"
4723         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4724         "\t    trigger or to start a hist trigger but not log any events\n"
4725         "\t    until told to do so.  'continue' can be used to start or\n"
4726         "\t    restart a paused hist trigger.\n\n"
4727         "\t    The 'clear' parameter will clear the contents of a running\n"
4728         "\t    hist trigger and leave its current paused/active state\n"
4729         "\t    unchanged.\n\n"
4730         "\t    The enable_hist and disable_hist triggers can be used to\n"
4731         "\t    have one event conditionally start and stop another event's\n"
4732         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4733         "\t    the enable_event and disable_event triggers.\n"
4734 #endif
4735 ;
4736
4737 static ssize_t
4738 tracing_readme_read(struct file *filp, char __user *ubuf,
4739                        size_t cnt, loff_t *ppos)
4740 {
4741         return simple_read_from_buffer(ubuf, cnt, ppos,
4742                                         readme_msg, strlen(readme_msg));
4743 }
4744
4745 static const struct file_operations tracing_readme_fops = {
4746         .open           = tracing_open_generic,
4747         .read           = tracing_readme_read,
4748         .llseek         = generic_file_llseek,
4749 };
4750
4751 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4752 {
4753         int pid = ++(*pos);
4754
4755         if (pid > PID_MAX_DEFAULT)
4756                 return NULL;
4757
4758         return &tgid_map[pid];
4759 }
4760
4761 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4762 {
4763         if (!tgid_map || *pos > PID_MAX_DEFAULT)
4764                 return NULL;
4765
4766         return &tgid_map[*pos];
4767 }
4768
4769 static void saved_tgids_stop(struct seq_file *m, void *v)
4770 {
4771 }
4772
4773 static int saved_tgids_show(struct seq_file *m, void *v)
4774 {
4775         int *entry = (int *)v;
4776         int pid = entry - tgid_map;
4777         int tgid = *entry;
4778
4779         if (tgid == 0)
4780                 return SEQ_SKIP;
4781
4782         seq_printf(m, "%d %d\n", pid, tgid);
4783         return 0;
4784 }
4785
4786 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4787         .start          = saved_tgids_start,
4788         .stop           = saved_tgids_stop,
4789         .next           = saved_tgids_next,
4790         .show           = saved_tgids_show,
4791 };
4792
4793 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4794 {
4795         if (tracing_disabled)
4796                 return -ENODEV;
4797
4798         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4799 }
4800
4801
4802 static const struct file_operations tracing_saved_tgids_fops = {
4803         .open           = tracing_saved_tgids_open,
4804         .read           = seq_read,
4805         .llseek         = seq_lseek,
4806         .release        = seq_release,
4807 };
4808
4809 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4810 {
4811         unsigned int *ptr = v;
4812
4813         if (*pos || m->count)
4814                 ptr++;
4815
4816         (*pos)++;
4817
4818         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4819              ptr++) {
4820                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4821                         continue;
4822
4823                 return ptr;
4824         }
4825
4826         return NULL;
4827 }
4828
4829 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4830 {
4831         void *v;
4832         loff_t l = 0;
4833
4834         preempt_disable();
4835         arch_spin_lock(&trace_cmdline_lock);
4836
4837         v = &savedcmd->map_cmdline_to_pid[0];
4838         while (l <= *pos) {
4839                 v = saved_cmdlines_next(m, v, &l);
4840                 if (!v)
4841                         return NULL;
4842         }
4843
4844         return v;
4845 }
4846
4847 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4848 {
4849         arch_spin_unlock(&trace_cmdline_lock);
4850         preempt_enable();
4851 }
4852
4853 static int saved_cmdlines_show(struct seq_file *m, void *v)
4854 {
4855         char buf[TASK_COMM_LEN];
4856         unsigned int *pid = v;
4857
4858         __trace_find_cmdline(*pid, buf);
4859         seq_printf(m, "%d %s\n", *pid, buf);
4860         return 0;
4861 }
4862
4863 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4864         .start          = saved_cmdlines_start,
4865         .next           = saved_cmdlines_next,
4866         .stop           = saved_cmdlines_stop,
4867         .show           = saved_cmdlines_show,
4868 };
4869
4870 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4871 {
4872         if (tracing_disabled)
4873                 return -ENODEV;
4874
4875         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4876 }
4877
4878 static const struct file_operations tracing_saved_cmdlines_fops = {
4879         .open           = tracing_saved_cmdlines_open,
4880         .read           = seq_read,
4881         .llseek         = seq_lseek,
4882         .release        = seq_release,
4883 };
4884
4885 static ssize_t
4886 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4887                                  size_t cnt, loff_t *ppos)
4888 {
4889         char buf[64];
4890         int r;
4891
4892         arch_spin_lock(&trace_cmdline_lock);
4893         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4894         arch_spin_unlock(&trace_cmdline_lock);
4895
4896         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4897 }
4898
4899 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4900 {
4901         kfree(s->saved_cmdlines);
4902         kfree(s->map_cmdline_to_pid);
4903         kfree(s);
4904 }
4905
4906 static int tracing_resize_saved_cmdlines(unsigned int val)
4907 {
4908         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4909
4910         s = kmalloc(sizeof(*s), GFP_KERNEL);
4911         if (!s)
4912                 return -ENOMEM;
4913
4914         if (allocate_cmdlines_buffer(val, s) < 0) {
4915                 kfree(s);
4916                 return -ENOMEM;
4917         }
4918
4919         arch_spin_lock(&trace_cmdline_lock);
4920         savedcmd_temp = savedcmd;
4921         savedcmd = s;
4922         arch_spin_unlock(&trace_cmdline_lock);
4923         free_saved_cmdlines_buffer(savedcmd_temp);
4924
4925         return 0;
4926 }
4927
4928 static ssize_t
4929 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4930                                   size_t cnt, loff_t *ppos)
4931 {
4932         unsigned long val;
4933         int ret;
4934
4935         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4936         if (ret)
4937                 return ret;
4938
4939         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4940         if (!val || val > PID_MAX_DEFAULT)
4941                 return -EINVAL;
4942
4943         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4944         if (ret < 0)
4945                 return ret;
4946
4947         *ppos += cnt;
4948
4949         return cnt;
4950 }
4951
4952 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4953         .open           = tracing_open_generic,
4954         .read           = tracing_saved_cmdlines_size_read,
4955         .write          = tracing_saved_cmdlines_size_write,
4956 };
4957
4958 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4959 static union trace_eval_map_item *
4960 update_eval_map(union trace_eval_map_item *ptr)
4961 {
4962         if (!ptr->map.eval_string) {
4963                 if (ptr->tail.next) {
4964                         ptr = ptr->tail.next;
4965                         /* Set ptr to the next real item (skip head) */
4966                         ptr++;
4967                 } else
4968                         return NULL;
4969         }
4970         return ptr;
4971 }
4972
4973 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4974 {
4975         union trace_eval_map_item *ptr = v;
4976
4977         /*
4978          * Paranoid! If ptr points to end, we don't want to increment past it.
4979          * This really should never happen.
4980          */
4981         ptr = update_eval_map(ptr);
4982         if (WARN_ON_ONCE(!ptr))
4983                 return NULL;
4984
4985         ptr++;
4986
4987         (*pos)++;
4988
4989         ptr = update_eval_map(ptr);
4990
4991         return ptr;
4992 }
4993
4994 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4995 {
4996         union trace_eval_map_item *v;
4997         loff_t l = 0;
4998
4999         mutex_lock(&trace_eval_mutex);
5000
5001         v = trace_eval_maps;
5002         if (v)
5003                 v++;
5004
5005         while (v && l < *pos) {
5006                 v = eval_map_next(m, v, &l);
5007         }
5008
5009         return v;
5010 }
5011
5012 static void eval_map_stop(struct seq_file *m, void *v)
5013 {
5014         mutex_unlock(&trace_eval_mutex);
5015 }
5016
5017 static int eval_map_show(struct seq_file *m, void *v)
5018 {
5019         union trace_eval_map_item *ptr = v;
5020
5021         seq_printf(m, "%s %ld (%s)\n",
5022                    ptr->map.eval_string, ptr->map.eval_value,
5023                    ptr->map.system);
5024
5025         return 0;
5026 }
5027
5028 static const struct seq_operations tracing_eval_map_seq_ops = {
5029         .start          = eval_map_start,
5030         .next           = eval_map_next,
5031         .stop           = eval_map_stop,
5032         .show           = eval_map_show,
5033 };
5034
5035 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5036 {
5037         if (tracing_disabled)
5038                 return -ENODEV;
5039
5040         return seq_open(filp, &tracing_eval_map_seq_ops);
5041 }
5042
5043 static const struct file_operations tracing_eval_map_fops = {
5044         .open           = tracing_eval_map_open,
5045         .read           = seq_read,
5046         .llseek         = seq_lseek,
5047         .release        = seq_release,
5048 };
5049
5050 static inline union trace_eval_map_item *
5051 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5052 {
5053         /* Return tail of array given the head */
5054         return ptr + ptr->head.length + 1;
5055 }
5056
5057 static void
5058 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5059                            int len)
5060 {
5061         struct trace_eval_map **stop;
5062         struct trace_eval_map **map;
5063         union trace_eval_map_item *map_array;
5064         union trace_eval_map_item *ptr;
5065
5066         stop = start + len;
5067
5068         /*
5069          * The trace_eval_maps contains the map plus a head and tail item,
5070          * where the head holds the module and length of array, and the
5071          * tail holds a pointer to the next list.
5072          */
5073         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5074         if (!map_array) {
5075                 pr_warn("Unable to allocate trace eval mapping\n");
5076                 return;
5077         }
5078
5079         mutex_lock(&trace_eval_mutex);
5080
5081         if (!trace_eval_maps)
5082                 trace_eval_maps = map_array;
5083         else {
5084                 ptr = trace_eval_maps;
5085                 for (;;) {
5086                         ptr = trace_eval_jmp_to_tail(ptr);
5087                         if (!ptr->tail.next)
5088                                 break;
5089                         ptr = ptr->tail.next;
5090
5091                 }
5092                 ptr->tail.next = map_array;
5093         }
5094         map_array->head.mod = mod;
5095         map_array->head.length = len;
5096         map_array++;
5097
5098         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5099                 map_array->map = **map;
5100                 map_array++;
5101         }
5102         memset(map_array, 0, sizeof(*map_array));
5103
5104         mutex_unlock(&trace_eval_mutex);
5105 }
5106
5107 static void trace_create_eval_file(struct dentry *d_tracer)
5108 {
5109         trace_create_file("eval_map", 0444, d_tracer,
5110                           NULL, &tracing_eval_map_fops);
5111 }
5112
5113 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5114 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5115 static inline void trace_insert_eval_map_file(struct module *mod,
5116                               struct trace_eval_map **start, int len) { }
5117 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5118
5119 static void trace_insert_eval_map(struct module *mod,
5120                                   struct trace_eval_map **start, int len)
5121 {
5122         struct trace_eval_map **map;
5123
5124         if (len <= 0)
5125                 return;
5126
5127         map = start;
5128
5129         trace_event_eval_update(map, len);
5130
5131         trace_insert_eval_map_file(mod, start, len);
5132 }
5133
5134 static ssize_t
5135 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5136                        size_t cnt, loff_t *ppos)
5137 {
5138         struct trace_array *tr = filp->private_data;
5139         char buf[MAX_TRACER_SIZE+2];
5140         int r;
5141
5142         mutex_lock(&trace_types_lock);
5143         r = sprintf(buf, "%s\n", tr->current_trace->name);
5144         mutex_unlock(&trace_types_lock);
5145
5146         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5147 }
5148
5149 int tracer_init(struct tracer *t, struct trace_array *tr)
5150 {
5151         tracing_reset_online_cpus(&tr->trace_buffer);
5152         return t->init(tr);
5153 }
5154
5155 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5156 {
5157         int cpu;
5158
5159         for_each_tracing_cpu(cpu)
5160                 per_cpu_ptr(buf->data, cpu)->entries = val;
5161 }
5162
5163 #ifdef CONFIG_TRACER_MAX_TRACE
5164 /* resize @tr's buffer to the size of @size_tr's entries */
5165 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5166                                         struct trace_buffer *size_buf, int cpu_id)
5167 {
5168         int cpu, ret = 0;
5169
5170         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5171                 for_each_tracing_cpu(cpu) {
5172                         ret = ring_buffer_resize(trace_buf->buffer,
5173                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5174                         if (ret < 0)
5175                                 break;
5176                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5177                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5178                 }
5179         } else {
5180                 ret = ring_buffer_resize(trace_buf->buffer,
5181                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5182                 if (ret == 0)
5183                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5184                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5185         }
5186
5187         return ret;
5188 }
5189 #endif /* CONFIG_TRACER_MAX_TRACE */
5190
5191 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5192                                         unsigned long size, int cpu)
5193 {
5194         int ret;
5195
5196         /*
5197          * If kernel or user changes the size of the ring buffer
5198          * we use the size that was given, and we can forget about
5199          * expanding it later.
5200          */
5201         ring_buffer_expanded = true;
5202
5203         /* May be called before buffers are initialized */
5204         if (!tr->trace_buffer.buffer)
5205                 return 0;
5206
5207         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5208         if (ret < 0)
5209                 return ret;
5210
5211 #ifdef CONFIG_TRACER_MAX_TRACE
5212         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5213             !tr->current_trace->use_max_tr)
5214                 goto out;
5215
5216         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5217         if (ret < 0) {
5218                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5219                                                      &tr->trace_buffer, cpu);
5220                 if (r < 0) {
5221                         /*
5222                          * AARGH! We are left with different
5223                          * size max buffer!!!!
5224                          * The max buffer is our "snapshot" buffer.
5225                          * When a tracer needs a snapshot (one of the
5226                          * latency tracers), it swaps the max buffer
5227                          * with the saved snap shot. We succeeded to
5228                          * update the size of the main buffer, but failed to
5229                          * update the size of the max buffer. But when we tried
5230                          * to reset the main buffer to the original size, we
5231                          * failed there too. This is very unlikely to
5232                          * happen, but if it does, warn and kill all
5233                          * tracing.
5234                          */
5235                         WARN_ON(1);
5236                         tracing_disabled = 1;
5237                 }
5238                 return ret;
5239         }
5240
5241         if (cpu == RING_BUFFER_ALL_CPUS)
5242                 set_buffer_entries(&tr->max_buffer, size);
5243         else
5244                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5245
5246  out:
5247 #endif /* CONFIG_TRACER_MAX_TRACE */
5248
5249         if (cpu == RING_BUFFER_ALL_CPUS)
5250                 set_buffer_entries(&tr->trace_buffer, size);
5251         else
5252                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5253
5254         return ret;
5255 }
5256
5257 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5258                                           unsigned long size, int cpu_id)
5259 {
5260         int ret = size;
5261
5262         mutex_lock(&trace_types_lock);
5263
5264         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5265                 /* make sure, this cpu is enabled in the mask */
5266                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5267                         ret = -EINVAL;
5268                         goto out;
5269                 }
5270         }
5271
5272         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5273         if (ret < 0)
5274                 ret = -ENOMEM;
5275
5276 out:
5277         mutex_unlock(&trace_types_lock);
5278
5279         return ret;
5280 }
5281
5282
5283 /**
5284  * tracing_update_buffers - used by tracing facility to expand ring buffers
5285  *
5286  * To save on memory when the tracing is never used on a system with it
5287  * configured in. The ring buffers are set to a minimum size. But once
5288  * a user starts to use the tracing facility, then they need to grow
5289  * to their default size.
5290  *
5291  * This function is to be called when a tracer is about to be used.
5292  */
5293 int tracing_update_buffers(void)
5294 {
5295         int ret = 0;
5296
5297         mutex_lock(&trace_types_lock);
5298         if (!ring_buffer_expanded)
5299                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5300                                                 RING_BUFFER_ALL_CPUS);
5301         mutex_unlock(&trace_types_lock);
5302
5303         return ret;
5304 }
5305
5306 struct trace_option_dentry;
5307
5308 static void
5309 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5310
5311 /*
5312  * Used to clear out the tracer before deletion of an instance.
5313  * Must have trace_types_lock held.
5314  */
5315 static void tracing_set_nop(struct trace_array *tr)
5316 {
5317         if (tr->current_trace == &nop_trace)
5318                 return;
5319         
5320         tr->current_trace->enabled--;
5321
5322         if (tr->current_trace->reset)
5323                 tr->current_trace->reset(tr);
5324
5325         tr->current_trace = &nop_trace;
5326 }
5327
5328 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5329 {
5330         /* Only enable if the directory has been created already. */
5331         if (!tr->dir)
5332                 return;
5333
5334         create_trace_option_files(tr, t);
5335 }
5336
5337 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5338 {
5339         struct tracer *t;
5340 #ifdef CONFIG_TRACER_MAX_TRACE
5341         bool had_max_tr;
5342 #endif
5343         int ret = 0;
5344
5345         mutex_lock(&trace_types_lock);
5346
5347         if (!ring_buffer_expanded) {
5348                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5349                                                 RING_BUFFER_ALL_CPUS);
5350                 if (ret < 0)
5351                         goto out;
5352                 ret = 0;
5353         }
5354
5355         for (t = trace_types; t; t = t->next) {
5356                 if (strcmp(t->name, buf) == 0)
5357                         break;
5358         }
5359         if (!t) {
5360                 ret = -EINVAL;
5361                 goto out;
5362         }
5363         if (t == tr->current_trace)
5364                 goto out;
5365
5366         /* Some tracers won't work on kernel command line */
5367         if (system_state < SYSTEM_RUNNING && t->noboot) {
5368                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5369                         t->name);
5370                 goto out;
5371         }
5372
5373         /* Some tracers are only allowed for the top level buffer */
5374         if (!trace_ok_for_array(t, tr)) {
5375                 ret = -EINVAL;
5376                 goto out;
5377         }
5378
5379         /* If trace pipe files are being read, we can't change the tracer */
5380         if (tr->current_trace->ref) {
5381                 ret = -EBUSY;
5382                 goto out;
5383         }
5384
5385         trace_branch_disable();
5386
5387         tr->current_trace->enabled--;
5388
5389         if (tr->current_trace->reset)
5390                 tr->current_trace->reset(tr);
5391
5392         /* Current trace needs to be nop_trace before synchronize_sched */
5393         tr->current_trace = &nop_trace;
5394
5395 #ifdef CONFIG_TRACER_MAX_TRACE
5396         had_max_tr = tr->allocated_snapshot;
5397
5398         if (had_max_tr && !t->use_max_tr) {
5399                 /*
5400                  * We need to make sure that the update_max_tr sees that
5401                  * current_trace changed to nop_trace to keep it from
5402                  * swapping the buffers after we resize it.
5403                  * The update_max_tr is called from interrupts disabled
5404                  * so a synchronized_sched() is sufficient.
5405                  */
5406                 synchronize_sched();
5407                 free_snapshot(tr);
5408         }
5409 #endif
5410
5411 #ifdef CONFIG_TRACER_MAX_TRACE
5412         if (t->use_max_tr && !had_max_tr) {
5413                 ret = tracing_alloc_snapshot_instance(tr);
5414                 if (ret < 0)
5415                         goto out;
5416         }
5417 #endif
5418
5419         if (t->init) {
5420                 ret = tracer_init(t, tr);
5421                 if (ret)
5422                         goto out;
5423         }
5424
5425         tr->current_trace = t;
5426         tr->current_trace->enabled++;
5427         trace_branch_enable(tr);
5428  out:
5429         mutex_unlock(&trace_types_lock);
5430
5431         return ret;
5432 }
5433
5434 static ssize_t
5435 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5436                         size_t cnt, loff_t *ppos)
5437 {
5438         struct trace_array *tr = filp->private_data;
5439         char buf[MAX_TRACER_SIZE+1];
5440         int i;
5441         size_t ret;
5442         int err;
5443
5444         ret = cnt;
5445
5446         if (cnt > MAX_TRACER_SIZE)
5447                 cnt = MAX_TRACER_SIZE;
5448
5449         if (copy_from_user(buf, ubuf, cnt))
5450                 return -EFAULT;
5451
5452         buf[cnt] = 0;
5453
5454         /* strip ending whitespace. */
5455         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5456                 buf[i] = 0;
5457
5458         err = tracing_set_tracer(tr, buf);
5459         if (err)
5460                 return err;
5461
5462         *ppos += ret;
5463
5464         return ret;
5465 }
5466
5467 static ssize_t
5468 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5469                    size_t cnt, loff_t *ppos)
5470 {
5471         char buf[64];
5472         int r;
5473
5474         r = snprintf(buf, sizeof(buf), "%ld\n",
5475                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5476         if (r > sizeof(buf))
5477                 r = sizeof(buf);
5478         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5479 }
5480
5481 static ssize_t
5482 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5483                     size_t cnt, loff_t *ppos)
5484 {
5485         unsigned long val;
5486         int ret;
5487
5488         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5489         if (ret)
5490                 return ret;
5491
5492         *ptr = val * 1000;
5493
5494         return cnt;
5495 }
5496
5497 static ssize_t
5498 tracing_thresh_read(struct file *filp, char __user *ubuf,
5499                     size_t cnt, loff_t *ppos)
5500 {
5501         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5502 }
5503
5504 static ssize_t
5505 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5506                      size_t cnt, loff_t *ppos)
5507 {
5508         struct trace_array *tr = filp->private_data;
5509         int ret;
5510
5511         mutex_lock(&trace_types_lock);
5512         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5513         if (ret < 0)
5514                 goto out;
5515
5516         if (tr->current_trace->update_thresh) {
5517                 ret = tr->current_trace->update_thresh(tr);
5518                 if (ret < 0)
5519                         goto out;
5520         }
5521
5522         ret = cnt;
5523 out:
5524         mutex_unlock(&trace_types_lock);
5525
5526         return ret;
5527 }
5528
5529 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5530
5531 static ssize_t
5532 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5533                      size_t cnt, loff_t *ppos)
5534 {
5535         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5536 }
5537
5538 static ssize_t
5539 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5540                       size_t cnt, loff_t *ppos)
5541 {
5542         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5543 }
5544
5545 #endif
5546
5547 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5548 {
5549         struct trace_array *tr = inode->i_private;
5550         struct trace_iterator *iter;
5551         int ret = 0;
5552
5553         if (tracing_disabled)
5554                 return -ENODEV;
5555
5556         if (trace_array_get(tr) < 0)
5557                 return -ENODEV;
5558
5559         mutex_lock(&trace_types_lock);
5560
5561         /* create a buffer to store the information to pass to userspace */
5562         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5563         if (!iter) {
5564                 ret = -ENOMEM;
5565                 __trace_array_put(tr);
5566                 goto out;
5567         }
5568
5569         trace_seq_init(&iter->seq);
5570         iter->trace = tr->current_trace;
5571
5572         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5573                 ret = -ENOMEM;
5574                 goto fail;
5575         }
5576
5577         /* trace pipe does not show start of buffer */
5578         cpumask_setall(iter->started);
5579
5580         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5581                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5582
5583         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5584         if (trace_clocks[tr->clock_id].in_ns)
5585                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5586
5587         iter->tr = tr;
5588         iter->trace_buffer = &tr->trace_buffer;
5589         iter->cpu_file = tracing_get_cpu(inode);
5590         mutex_init(&iter->mutex);
5591         filp->private_data = iter;
5592
5593         if (iter->trace->pipe_open)
5594                 iter->trace->pipe_open(iter);
5595
5596         nonseekable_open(inode, filp);
5597
5598         tr->current_trace->ref++;
5599 out:
5600         mutex_unlock(&trace_types_lock);
5601         return ret;
5602
5603 fail:
5604         kfree(iter);
5605         __trace_array_put(tr);
5606         mutex_unlock(&trace_types_lock);
5607         return ret;
5608 }
5609
5610 static int tracing_release_pipe(struct inode *inode, struct file *file)
5611 {
5612         struct trace_iterator *iter = file->private_data;
5613         struct trace_array *tr = inode->i_private;
5614
5615         mutex_lock(&trace_types_lock);
5616
5617         tr->current_trace->ref--;
5618
5619         if (iter->trace->pipe_close)
5620                 iter->trace->pipe_close(iter);
5621
5622         mutex_unlock(&trace_types_lock);
5623
5624         free_cpumask_var(iter->started);
5625         mutex_destroy(&iter->mutex);
5626         kfree(iter);
5627
5628         trace_array_put(tr);
5629
5630         return 0;
5631 }
5632
5633 static unsigned int
5634 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5635 {
5636         struct trace_array *tr = iter->tr;
5637
5638         /* Iterators are static, they should be filled or empty */
5639         if (trace_buffer_iter(iter, iter->cpu_file))
5640                 return POLLIN | POLLRDNORM;
5641
5642         if (tr->trace_flags & TRACE_ITER_BLOCK)
5643                 /*
5644                  * Always select as readable when in blocking mode
5645                  */
5646                 return POLLIN | POLLRDNORM;
5647         else
5648                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5649                                              filp, poll_table);
5650 }
5651
5652 static unsigned int
5653 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5654 {
5655         struct trace_iterator *iter = filp->private_data;
5656
5657         return trace_poll(iter, filp, poll_table);
5658 }
5659
5660 /* Must be called with iter->mutex held. */
5661 static int tracing_wait_pipe(struct file *filp)
5662 {
5663         struct trace_iterator *iter = filp->private_data;
5664         int ret;
5665
5666         while (trace_empty(iter)) {
5667
5668                 if ((filp->f_flags & O_NONBLOCK)) {
5669                         return -EAGAIN;
5670                 }
5671
5672                 /*
5673                  * We block until we read something and tracing is disabled.
5674                  * We still block if tracing is disabled, but we have never
5675                  * read anything. This allows a user to cat this file, and
5676                  * then enable tracing. But after we have read something,
5677                  * we give an EOF when tracing is again disabled.
5678                  *
5679                  * iter->pos will be 0 if we haven't read anything.
5680                  */
5681                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5682                         break;
5683
5684                 mutex_unlock(&iter->mutex);
5685
5686                 ret = wait_on_pipe(iter, false);
5687
5688                 mutex_lock(&iter->mutex);
5689
5690                 if (ret)
5691                         return ret;
5692         }
5693
5694         return 1;
5695 }
5696
5697 /*
5698  * Consumer reader.
5699  */
5700 static ssize_t
5701 tracing_read_pipe(struct file *filp, char __user *ubuf,
5702                   size_t cnt, loff_t *ppos)
5703 {
5704         struct trace_iterator *iter = filp->private_data;
5705         ssize_t sret;
5706
5707         /*
5708          * Avoid more than one consumer on a single file descriptor
5709          * This is just a matter of traces coherency, the ring buffer itself
5710          * is protected.
5711          */
5712         mutex_lock(&iter->mutex);
5713
5714         /* return any leftover data */
5715         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5716         if (sret != -EBUSY)
5717                 goto out;
5718
5719         trace_seq_init(&iter->seq);
5720
5721         if (iter->trace->read) {
5722                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5723                 if (sret)
5724                         goto out;
5725         }
5726
5727 waitagain:
5728         sret = tracing_wait_pipe(filp);
5729         if (sret <= 0)
5730                 goto out;
5731
5732         /* stop when tracing is finished */
5733         if (trace_empty(iter)) {
5734                 sret = 0;
5735                 goto out;
5736         }
5737
5738         if (cnt >= PAGE_SIZE)
5739                 cnt = PAGE_SIZE - 1;
5740
5741         /* reset all but tr, trace, and overruns */
5742         memset(&iter->seq, 0,
5743                sizeof(struct trace_iterator) -
5744                offsetof(struct trace_iterator, seq));
5745         cpumask_clear(iter->started);
5746         trace_seq_init(&iter->seq);
5747         iter->pos = -1;
5748
5749         trace_event_read_lock();
5750         trace_access_lock(iter->cpu_file);
5751         while (trace_find_next_entry_inc(iter) != NULL) {
5752                 enum print_line_t ret;
5753                 int save_len = iter->seq.seq.len;
5754
5755                 ret = print_trace_line(iter);
5756                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5757                         /* don't print partial lines */
5758                         iter->seq.seq.len = save_len;
5759                         break;
5760                 }
5761                 if (ret != TRACE_TYPE_NO_CONSUME)
5762                         trace_consume(iter);
5763
5764                 if (trace_seq_used(&iter->seq) >= cnt)
5765                         break;
5766
5767                 /*
5768                  * Setting the full flag means we reached the trace_seq buffer
5769                  * size and we should leave by partial output condition above.
5770                  * One of the trace_seq_* functions is not used properly.
5771                  */
5772                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5773                           iter->ent->type);
5774         }
5775         trace_access_unlock(iter->cpu_file);
5776         trace_event_read_unlock();
5777
5778         /* Now copy what we have to the user */
5779         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5780         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5781                 trace_seq_init(&iter->seq);
5782
5783         /*
5784          * If there was nothing to send to user, in spite of consuming trace
5785          * entries, go back to wait for more entries.
5786          */
5787         if (sret == -EBUSY)
5788                 goto waitagain;
5789
5790 out:
5791         mutex_unlock(&iter->mutex);
5792
5793         return sret;
5794 }
5795
5796 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5797                                      unsigned int idx)
5798 {
5799         __free_page(spd->pages[idx]);
5800 }
5801
5802 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5803         .can_merge              = 0,
5804         .confirm                = generic_pipe_buf_confirm,
5805         .release                = generic_pipe_buf_release,
5806         .steal                  = generic_pipe_buf_steal,
5807         .get                    = generic_pipe_buf_get,
5808 };
5809
5810 static size_t
5811 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5812 {
5813         size_t count;
5814         int save_len;
5815         int ret;
5816
5817         /* Seq buffer is page-sized, exactly what we need. */
5818         for (;;) {
5819                 save_len = iter->seq.seq.len;
5820                 ret = print_trace_line(iter);
5821
5822                 if (trace_seq_has_overflowed(&iter->seq)) {
5823                         iter->seq.seq.len = save_len;
5824                         break;
5825                 }
5826
5827                 /*
5828                  * This should not be hit, because it should only
5829                  * be set if the iter->seq overflowed. But check it
5830                  * anyway to be safe.
5831                  */
5832                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5833                         iter->seq.seq.len = save_len;
5834                         break;
5835                 }
5836
5837                 count = trace_seq_used(&iter->seq) - save_len;
5838                 if (rem < count) {
5839                         rem = 0;
5840                         iter->seq.seq.len = save_len;
5841                         break;
5842                 }
5843
5844                 if (ret != TRACE_TYPE_NO_CONSUME)
5845                         trace_consume(iter);
5846                 rem -= count;
5847                 if (!trace_find_next_entry_inc(iter))   {
5848                         rem = 0;
5849                         iter->ent = NULL;
5850                         break;
5851                 }
5852         }
5853
5854         return rem;
5855 }
5856
5857 static ssize_t tracing_splice_read_pipe(struct file *filp,
5858                                         loff_t *ppos,
5859                                         struct pipe_inode_info *pipe,
5860                                         size_t len,
5861                                         unsigned int flags)
5862 {
5863         struct page *pages_def[PIPE_DEF_BUFFERS];
5864         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5865         struct trace_iterator *iter = filp->private_data;
5866         struct splice_pipe_desc spd = {
5867                 .pages          = pages_def,
5868                 .partial        = partial_def,
5869                 .nr_pages       = 0, /* This gets updated below. */
5870                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5871                 .ops            = &tracing_pipe_buf_ops,
5872                 .spd_release    = tracing_spd_release_pipe,
5873         };
5874         ssize_t ret;
5875         size_t rem;
5876         unsigned int i;
5877
5878         if (splice_grow_spd(pipe, &spd))
5879                 return -ENOMEM;
5880
5881         mutex_lock(&iter->mutex);
5882
5883         if (iter->trace->splice_read) {
5884                 ret = iter->trace->splice_read(iter, filp,
5885                                                ppos, pipe, len, flags);
5886                 if (ret)
5887                         goto out_err;
5888         }
5889
5890         ret = tracing_wait_pipe(filp);
5891         if (ret <= 0)
5892                 goto out_err;
5893
5894         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5895                 ret = -EFAULT;
5896                 goto out_err;
5897         }
5898
5899         trace_event_read_lock();
5900         trace_access_lock(iter->cpu_file);
5901
5902         /* Fill as many pages as possible. */
5903         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5904                 spd.pages[i] = alloc_page(GFP_KERNEL);
5905                 if (!spd.pages[i])
5906                         break;
5907
5908                 rem = tracing_fill_pipe_page(rem, iter);
5909
5910                 /* Copy the data into the page, so we can start over. */
5911                 ret = trace_seq_to_buffer(&iter->seq,
5912                                           page_address(spd.pages[i]),
5913                                           trace_seq_used(&iter->seq));
5914                 if (ret < 0) {
5915                         __free_page(spd.pages[i]);
5916                         break;
5917                 }
5918                 spd.partial[i].offset = 0;
5919                 spd.partial[i].len = trace_seq_used(&iter->seq);
5920
5921                 trace_seq_init(&iter->seq);
5922         }
5923
5924         trace_access_unlock(iter->cpu_file);
5925         trace_event_read_unlock();
5926         mutex_unlock(&iter->mutex);
5927
5928         spd.nr_pages = i;
5929
5930         if (i)
5931                 ret = splice_to_pipe(pipe, &spd);
5932         else
5933                 ret = 0;
5934 out:
5935         splice_shrink_spd(&spd);
5936         return ret;
5937
5938 out_err:
5939         mutex_unlock(&iter->mutex);
5940         goto out;
5941 }
5942
5943 static ssize_t
5944 tracing_entries_read(struct file *filp, char __user *ubuf,
5945                      size_t cnt, loff_t *ppos)
5946 {
5947         struct inode *inode = file_inode(filp);
5948         struct trace_array *tr = inode->i_private;
5949         int cpu = tracing_get_cpu(inode);
5950         char buf[64];
5951         int r = 0;
5952         ssize_t ret;
5953
5954         mutex_lock(&trace_types_lock);
5955
5956         if (cpu == RING_BUFFER_ALL_CPUS) {
5957                 int cpu, buf_size_same;
5958                 unsigned long size;
5959
5960                 size = 0;
5961                 buf_size_same = 1;
5962                 /* check if all cpu sizes are same */
5963                 for_each_tracing_cpu(cpu) {
5964                         /* fill in the size from first enabled cpu */
5965                         if (size == 0)
5966                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5967                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5968                                 buf_size_same = 0;
5969                                 break;
5970                         }
5971                 }
5972
5973                 if (buf_size_same) {
5974                         if (!ring_buffer_expanded)
5975                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5976                                             size >> 10,
5977                                             trace_buf_size >> 10);
5978                         else
5979                                 r = sprintf(buf, "%lu\n", size >> 10);
5980                 } else
5981                         r = sprintf(buf, "X\n");
5982         } else
5983                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5984
5985         mutex_unlock(&trace_types_lock);
5986
5987         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5988         return ret;
5989 }
5990
5991 static ssize_t
5992 tracing_entries_write(struct file *filp, const char __user *ubuf,
5993                       size_t cnt, loff_t *ppos)
5994 {
5995         struct inode *inode = file_inode(filp);
5996         struct trace_array *tr = inode->i_private;
5997         unsigned long val;
5998         int ret;
5999
6000         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6001         if (ret)
6002                 return ret;
6003
6004         /* must have at least 1 entry */
6005         if (!val)
6006                 return -EINVAL;
6007
6008         /* value is in KB */
6009         val <<= 10;
6010         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6011         if (ret < 0)
6012                 return ret;
6013
6014         *ppos += cnt;
6015
6016         return cnt;
6017 }
6018
6019 static ssize_t
6020 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6021                                 size_t cnt, loff_t *ppos)
6022 {
6023         struct trace_array *tr = filp->private_data;
6024         char buf[64];
6025         int r, cpu;
6026         unsigned long size = 0, expanded_size = 0;
6027
6028         mutex_lock(&trace_types_lock);
6029         for_each_tracing_cpu(cpu) {
6030                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6031                 if (!ring_buffer_expanded)
6032                         expanded_size += trace_buf_size >> 10;
6033         }
6034         if (ring_buffer_expanded)
6035                 r = sprintf(buf, "%lu\n", size);
6036         else
6037                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6038         mutex_unlock(&trace_types_lock);
6039
6040         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6041 }
6042
6043 static ssize_t
6044 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6045                           size_t cnt, loff_t *ppos)
6046 {
6047         /*
6048          * There is no need to read what the user has written, this function
6049          * is just to make sure that there is no error when "echo" is used
6050          */
6051
6052         *ppos += cnt;
6053
6054         return cnt;
6055 }
6056
6057 static int
6058 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6059 {
6060         struct trace_array *tr = inode->i_private;
6061
6062         /* disable tracing ? */
6063         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6064                 tracer_tracing_off(tr);
6065         /* resize the ring buffer to 0 */
6066         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6067
6068         trace_array_put(tr);
6069
6070         return 0;
6071 }
6072
6073 static ssize_t
6074 tracing_mark_write(struct file *filp, const char __user *ubuf,
6075                                         size_t cnt, loff_t *fpos)
6076 {
6077         struct trace_array *tr = filp->private_data;
6078         struct ring_buffer_event *event;
6079         struct ring_buffer *buffer;
6080         struct print_entry *entry;
6081         unsigned long irq_flags;
6082         const char faulted[] = "<faulted>";
6083         ssize_t written;
6084         int size;
6085         int len;
6086
6087 /* Used in tracing_mark_raw_write() as well */
6088 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6089
6090         if (tracing_disabled)
6091                 return -EINVAL;
6092
6093         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6094                 return -EINVAL;
6095
6096         if (cnt > TRACE_BUF_SIZE)
6097                 cnt = TRACE_BUF_SIZE;
6098
6099         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6100
6101         local_save_flags(irq_flags);
6102         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6103
6104         /* If less than "<faulted>", then make sure we can still add that */
6105         if (cnt < FAULTED_SIZE)
6106                 size += FAULTED_SIZE - cnt;
6107
6108         buffer = tr->trace_buffer.buffer;
6109         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6110                                             irq_flags, preempt_count());
6111         if (unlikely(!event))
6112                 /* Ring buffer disabled, return as if not open for write */
6113                 return -EBADF;
6114
6115         entry = ring_buffer_event_data(event);
6116         entry->ip = _THIS_IP_;
6117
6118         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6119         if (len) {
6120                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6121                 cnt = FAULTED_SIZE;
6122                 written = -EFAULT;
6123         } else
6124                 written = cnt;
6125         len = cnt;
6126
6127         if (entry->buf[cnt - 1] != '\n') {
6128                 entry->buf[cnt] = '\n';
6129                 entry->buf[cnt + 1] = '\0';
6130         } else
6131                 entry->buf[cnt] = '\0';
6132
6133         __buffer_unlock_commit(buffer, event);
6134
6135         if (written > 0)
6136                 *fpos += written;
6137
6138         return written;
6139 }
6140
6141 /* Limit it for now to 3K (including tag) */
6142 #define RAW_DATA_MAX_SIZE (1024*3)
6143
6144 static ssize_t
6145 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6146                                         size_t cnt, loff_t *fpos)
6147 {
6148         struct trace_array *tr = filp->private_data;
6149         struct ring_buffer_event *event;
6150         struct ring_buffer *buffer;
6151         struct raw_data_entry *entry;
6152         const char faulted[] = "<faulted>";
6153         unsigned long irq_flags;
6154         ssize_t written;
6155         int size;
6156         int len;
6157
6158 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6159
6160         if (tracing_disabled)
6161                 return -EINVAL;
6162
6163         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6164                 return -EINVAL;
6165
6166         /* The marker must at least have a tag id */
6167         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6168                 return -EINVAL;
6169
6170         if (cnt > TRACE_BUF_SIZE)
6171                 cnt = TRACE_BUF_SIZE;
6172
6173         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6174
6175         local_save_flags(irq_flags);
6176         size = sizeof(*entry) + cnt;
6177         if (cnt < FAULT_SIZE_ID)
6178                 size += FAULT_SIZE_ID - cnt;
6179
6180         buffer = tr->trace_buffer.buffer;
6181         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6182                                             irq_flags, preempt_count());
6183         if (!event)
6184                 /* Ring buffer disabled, return as if not open for write */
6185                 return -EBADF;
6186
6187         entry = ring_buffer_event_data(event);
6188
6189         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6190         if (len) {
6191                 entry->id = -1;
6192                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6193                 written = -EFAULT;
6194         } else
6195                 written = cnt;
6196
6197         __buffer_unlock_commit(buffer, event);
6198
6199         if (written > 0)
6200                 *fpos += written;
6201
6202         return written;
6203 }
6204
6205 static int tracing_clock_show(struct seq_file *m, void *v)
6206 {
6207         struct trace_array *tr = m->private;
6208         int i;
6209
6210         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6211                 seq_printf(m,
6212                         "%s%s%s%s", i ? " " : "",
6213                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6214                         i == tr->clock_id ? "]" : "");
6215         seq_putc(m, '\n');
6216
6217         return 0;
6218 }
6219
6220 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6221 {
6222         int i;
6223
6224         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6225                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6226                         break;
6227         }
6228         if (i == ARRAY_SIZE(trace_clocks))
6229                 return -EINVAL;
6230
6231         mutex_lock(&trace_types_lock);
6232
6233         tr->clock_id = i;
6234
6235         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6236
6237         /*
6238          * New clock may not be consistent with the previous clock.
6239          * Reset the buffer so that it doesn't have incomparable timestamps.
6240          */
6241         tracing_reset_online_cpus(&tr->trace_buffer);
6242
6243 #ifdef CONFIG_TRACER_MAX_TRACE
6244         if (tr->max_buffer.buffer)
6245                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6246         tracing_reset_online_cpus(&tr->max_buffer);
6247 #endif
6248
6249         mutex_unlock(&trace_types_lock);
6250
6251         return 0;
6252 }
6253
6254 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6255                                    size_t cnt, loff_t *fpos)
6256 {
6257         struct seq_file *m = filp->private_data;
6258         struct trace_array *tr = m->private;
6259         char buf[64];
6260         const char *clockstr;
6261         int ret;
6262
6263         if (cnt >= sizeof(buf))
6264                 return -EINVAL;
6265
6266         if (copy_from_user(buf, ubuf, cnt))
6267                 return -EFAULT;
6268
6269         buf[cnt] = 0;
6270
6271         clockstr = strstrip(buf);
6272
6273         ret = tracing_set_clock(tr, clockstr);
6274         if (ret)
6275                 return ret;
6276
6277         *fpos += cnt;
6278
6279         return cnt;
6280 }
6281
6282 static int tracing_clock_open(struct inode *inode, struct file *file)
6283 {
6284         struct trace_array *tr = inode->i_private;
6285         int ret;
6286
6287         if (tracing_disabled)
6288                 return -ENODEV;
6289
6290         if (trace_array_get(tr))
6291                 return -ENODEV;
6292
6293         ret = single_open(file, tracing_clock_show, inode->i_private);
6294         if (ret < 0)
6295                 trace_array_put(tr);
6296
6297         return ret;
6298 }
6299
6300 struct ftrace_buffer_info {
6301         struct trace_iterator   iter;
6302         void                    *spare;
6303         unsigned int            spare_cpu;
6304         unsigned int            read;
6305 };
6306
6307 #ifdef CONFIG_TRACER_SNAPSHOT
6308 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6309 {
6310         struct trace_array *tr = inode->i_private;
6311         struct trace_iterator *iter;
6312         struct seq_file *m;
6313         int ret = 0;
6314
6315         if (trace_array_get(tr) < 0)
6316                 return -ENODEV;
6317
6318         if (file->f_mode & FMODE_READ) {
6319                 iter = __tracing_open(inode, file, true);
6320                 if (IS_ERR(iter))
6321                         ret = PTR_ERR(iter);
6322         } else {
6323                 /* Writes still need the seq_file to hold the private data */
6324                 ret = -ENOMEM;
6325                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6326                 if (!m)
6327                         goto out;
6328                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6329                 if (!iter) {
6330                         kfree(m);
6331                         goto out;
6332                 }
6333                 ret = 0;
6334
6335                 iter->tr = tr;
6336                 iter->trace_buffer = &tr->max_buffer;
6337                 iter->cpu_file = tracing_get_cpu(inode);
6338                 m->private = iter;
6339                 file->private_data = m;
6340         }
6341 out:
6342         if (ret < 0)
6343                 trace_array_put(tr);
6344
6345         return ret;
6346 }
6347
6348 static ssize_t
6349 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6350                        loff_t *ppos)
6351 {
6352         struct seq_file *m = filp->private_data;
6353         struct trace_iterator *iter = m->private;
6354         struct trace_array *tr = iter->tr;
6355         unsigned long val;
6356         int ret;
6357
6358         ret = tracing_update_buffers();
6359         if (ret < 0)
6360                 return ret;
6361
6362         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6363         if (ret)
6364                 return ret;
6365
6366         mutex_lock(&trace_types_lock);
6367
6368         if (tr->current_trace->use_max_tr) {
6369                 ret = -EBUSY;
6370                 goto out;
6371         }
6372
6373         switch (val) {
6374         case 0:
6375                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6376                         ret = -EINVAL;
6377                         break;
6378                 }
6379                 if (tr->allocated_snapshot)
6380                         free_snapshot(tr);
6381                 break;
6382         case 1:
6383 /* Only allow per-cpu swap if the ring buffer supports it */
6384 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6385                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6386                         ret = -EINVAL;
6387                         break;
6388                 }
6389 #endif
6390                 if (tr->allocated_snapshot)
6391                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6392                                         &tr->trace_buffer, iter->cpu_file);
6393                 else
6394                         ret = tracing_alloc_snapshot_instance(tr);
6395                 if (ret < 0)
6396                         break;
6397                 local_irq_disable();
6398                 /* Now, we're going to swap */
6399                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6400                         update_max_tr(tr, current, smp_processor_id());
6401                 else
6402                         update_max_tr_single(tr, current, iter->cpu_file);
6403                 local_irq_enable();
6404                 break;
6405         default:
6406                 if (tr->allocated_snapshot) {
6407                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6408                                 tracing_reset_online_cpus(&tr->max_buffer);
6409                         else
6410                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6411                 }
6412                 break;
6413         }
6414
6415         if (ret >= 0) {
6416                 *ppos += cnt;
6417                 ret = cnt;
6418         }
6419 out:
6420         mutex_unlock(&trace_types_lock);
6421         return ret;
6422 }
6423
6424 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6425 {
6426         struct seq_file *m = file->private_data;
6427         int ret;
6428
6429         ret = tracing_release(inode, file);
6430
6431         if (file->f_mode & FMODE_READ)
6432                 return ret;
6433
6434         /* If write only, the seq_file is just a stub */
6435         if (m)
6436                 kfree(m->private);
6437         kfree(m);
6438
6439         return 0;
6440 }
6441
6442 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6443 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6444                                     size_t count, loff_t *ppos);
6445 static int tracing_buffers_release(struct inode *inode, struct file *file);
6446 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6447                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6448
6449 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6450 {
6451         struct ftrace_buffer_info *info;
6452         int ret;
6453
6454         ret = tracing_buffers_open(inode, filp);
6455         if (ret < 0)
6456                 return ret;
6457
6458         info = filp->private_data;
6459
6460         if (info->iter.trace->use_max_tr) {
6461                 tracing_buffers_release(inode, filp);
6462                 return -EBUSY;
6463         }
6464
6465         info->iter.snapshot = true;
6466         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6467
6468         return ret;
6469 }
6470
6471 #endif /* CONFIG_TRACER_SNAPSHOT */
6472
6473
6474 static const struct file_operations tracing_thresh_fops = {
6475         .open           = tracing_open_generic,
6476         .read           = tracing_thresh_read,
6477         .write          = tracing_thresh_write,
6478         .llseek         = generic_file_llseek,
6479 };
6480
6481 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6482 static const struct file_operations tracing_max_lat_fops = {
6483         .open           = tracing_open_generic,
6484         .read           = tracing_max_lat_read,
6485         .write          = tracing_max_lat_write,
6486         .llseek         = generic_file_llseek,
6487 };
6488 #endif
6489
6490 static const struct file_operations set_tracer_fops = {
6491         .open           = tracing_open_generic,
6492         .read           = tracing_set_trace_read,
6493         .write          = tracing_set_trace_write,
6494         .llseek         = generic_file_llseek,
6495 };
6496
6497 static const struct file_operations tracing_pipe_fops = {
6498         .open           = tracing_open_pipe,
6499         .poll           = tracing_poll_pipe,
6500         .read           = tracing_read_pipe,
6501         .splice_read    = tracing_splice_read_pipe,
6502         .release        = tracing_release_pipe,
6503         .llseek         = no_llseek,
6504 };
6505
6506 static const struct file_operations tracing_entries_fops = {
6507         .open           = tracing_open_generic_tr,
6508         .read           = tracing_entries_read,
6509         .write          = tracing_entries_write,
6510         .llseek         = generic_file_llseek,
6511         .release        = tracing_release_generic_tr,
6512 };
6513
6514 static const struct file_operations tracing_total_entries_fops = {
6515         .open           = tracing_open_generic_tr,
6516         .read           = tracing_total_entries_read,
6517         .llseek         = generic_file_llseek,
6518         .release        = tracing_release_generic_tr,
6519 };
6520
6521 static const struct file_operations tracing_free_buffer_fops = {
6522         .open           = tracing_open_generic_tr,
6523         .write          = tracing_free_buffer_write,
6524         .release        = tracing_free_buffer_release,
6525 };
6526
6527 static const struct file_operations tracing_mark_fops = {
6528         .open           = tracing_open_generic_tr,
6529         .write          = tracing_mark_write,
6530         .llseek         = generic_file_llseek,
6531         .release        = tracing_release_generic_tr,
6532 };
6533
6534 static const struct file_operations tracing_mark_raw_fops = {
6535         .open           = tracing_open_generic_tr,
6536         .write          = tracing_mark_raw_write,
6537         .llseek         = generic_file_llseek,
6538         .release        = tracing_release_generic_tr,
6539 };
6540
6541 static const struct file_operations trace_clock_fops = {
6542         .open           = tracing_clock_open,
6543         .read           = seq_read,
6544         .llseek         = seq_lseek,
6545         .release        = tracing_single_release_tr,
6546         .write          = tracing_clock_write,
6547 };
6548
6549 #ifdef CONFIG_TRACER_SNAPSHOT
6550 static const struct file_operations snapshot_fops = {
6551         .open           = tracing_snapshot_open,
6552         .read           = seq_read,
6553         .write          = tracing_snapshot_write,
6554         .llseek         = tracing_lseek,
6555         .release        = tracing_snapshot_release,
6556 };
6557
6558 static const struct file_operations snapshot_raw_fops = {
6559         .open           = snapshot_raw_open,
6560         .read           = tracing_buffers_read,
6561         .release        = tracing_buffers_release,
6562         .splice_read    = tracing_buffers_splice_read,
6563         .llseek         = no_llseek,
6564 };
6565
6566 #endif /* CONFIG_TRACER_SNAPSHOT */
6567
6568 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6569 {
6570         struct trace_array *tr = inode->i_private;
6571         struct ftrace_buffer_info *info;
6572         int ret;
6573
6574         if (tracing_disabled)
6575                 return -ENODEV;
6576
6577         if (trace_array_get(tr) < 0)
6578                 return -ENODEV;
6579
6580         info = kzalloc(sizeof(*info), GFP_KERNEL);
6581         if (!info) {
6582                 trace_array_put(tr);
6583                 return -ENOMEM;
6584         }
6585
6586         mutex_lock(&trace_types_lock);
6587
6588         info->iter.tr           = tr;
6589         info->iter.cpu_file     = tracing_get_cpu(inode);
6590         info->iter.trace        = tr->current_trace;
6591         info->iter.trace_buffer = &tr->trace_buffer;
6592         info->spare             = NULL;
6593         /* Force reading ring buffer for first read */
6594         info->read              = (unsigned int)-1;
6595
6596         filp->private_data = info;
6597
6598         tr->current_trace->ref++;
6599
6600         mutex_unlock(&trace_types_lock);
6601
6602         ret = nonseekable_open(inode, filp);
6603         if (ret < 0)
6604                 trace_array_put(tr);
6605
6606         return ret;
6607 }
6608
6609 static unsigned int
6610 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6611 {
6612         struct ftrace_buffer_info *info = filp->private_data;
6613         struct trace_iterator *iter = &info->iter;
6614
6615         return trace_poll(iter, filp, poll_table);
6616 }
6617
6618 static ssize_t
6619 tracing_buffers_read(struct file *filp, char __user *ubuf,
6620                      size_t count, loff_t *ppos)
6621 {
6622         struct ftrace_buffer_info *info = filp->private_data;
6623         struct trace_iterator *iter = &info->iter;
6624         ssize_t ret = 0;
6625         ssize_t size;
6626
6627         if (!count)
6628                 return 0;
6629
6630 #ifdef CONFIG_TRACER_MAX_TRACE
6631         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6632                 return -EBUSY;
6633 #endif
6634
6635         if (!info->spare) {
6636                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6637                                                           iter->cpu_file);
6638                 if (IS_ERR(info->spare)) {
6639                         ret = PTR_ERR(info->spare);
6640                         info->spare = NULL;
6641                 } else {
6642                         info->spare_cpu = iter->cpu_file;
6643                 }
6644         }
6645         if (!info->spare)
6646                 return ret;
6647
6648         /* Do we have previous read data to read? */
6649         if (info->read < PAGE_SIZE)
6650                 goto read;
6651
6652  again:
6653         trace_access_lock(iter->cpu_file);
6654         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6655                                     &info->spare,
6656                                     count,
6657                                     iter->cpu_file, 0);
6658         trace_access_unlock(iter->cpu_file);
6659
6660         if (ret < 0) {
6661                 if (trace_empty(iter)) {
6662                         if ((filp->f_flags & O_NONBLOCK))
6663                                 return -EAGAIN;
6664
6665                         ret = wait_on_pipe(iter, false);
6666                         if (ret)
6667                                 return ret;
6668
6669                         goto again;
6670                 }
6671                 return 0;
6672         }
6673
6674         info->read = 0;
6675  read:
6676         size = PAGE_SIZE - info->read;
6677         if (size > count)
6678                 size = count;
6679
6680         ret = copy_to_user(ubuf, info->spare + info->read, size);
6681         if (ret == size)
6682                 return -EFAULT;
6683
6684         size -= ret;
6685
6686         *ppos += size;
6687         info->read += size;
6688
6689         return size;
6690 }
6691
6692 static int tracing_buffers_release(struct inode *inode, struct file *file)
6693 {
6694         struct ftrace_buffer_info *info = file->private_data;
6695         struct trace_iterator *iter = &info->iter;
6696
6697         mutex_lock(&trace_types_lock);
6698
6699         iter->tr->current_trace->ref--;
6700
6701         __trace_array_put(iter->tr);
6702
6703         if (info->spare)
6704                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6705                                            info->spare_cpu, info->spare);
6706         kfree(info);
6707
6708         mutex_unlock(&trace_types_lock);
6709
6710         return 0;
6711 }
6712
6713 struct buffer_ref {
6714         struct ring_buffer      *buffer;
6715         void                    *page;
6716         int                     cpu;
6717         refcount_t              refcount;
6718 };
6719
6720 static void buffer_ref_release(struct buffer_ref *ref)
6721 {
6722         if (!refcount_dec_and_test(&ref->refcount))
6723                 return;
6724         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6725         kfree(ref);
6726 }
6727
6728 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6729                                     struct pipe_buffer *buf)
6730 {
6731         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6732
6733         buffer_ref_release(ref);
6734         buf->private = 0;
6735 }
6736
6737 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6738                                 struct pipe_buffer *buf)
6739 {
6740         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6741
6742         if (refcount_read(&ref->refcount) > INT_MAX/2)
6743                 return false;
6744
6745         refcount_inc(&ref->refcount);
6746         return true;
6747 }
6748
6749 /* Pipe buffer operations for a buffer. */
6750 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6751         .can_merge              = 0,
6752         .confirm                = generic_pipe_buf_confirm,
6753         .release                = buffer_pipe_buf_release,
6754         .steal                  = generic_pipe_buf_nosteal,
6755         .get                    = buffer_pipe_buf_get,
6756 };
6757
6758 /*
6759  * Callback from splice_to_pipe(), if we need to release some pages
6760  * at the end of the spd in case we error'ed out in filling the pipe.
6761  */
6762 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6763 {
6764         struct buffer_ref *ref =
6765                 (struct buffer_ref *)spd->partial[i].private;
6766
6767         buffer_ref_release(ref);
6768         spd->partial[i].private = 0;
6769 }
6770
6771 static ssize_t
6772 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6773                             struct pipe_inode_info *pipe, size_t len,
6774                             unsigned int flags)
6775 {
6776         struct ftrace_buffer_info *info = file->private_data;
6777         struct trace_iterator *iter = &info->iter;
6778         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6779         struct page *pages_def[PIPE_DEF_BUFFERS];
6780         struct splice_pipe_desc spd = {
6781                 .pages          = pages_def,
6782                 .partial        = partial_def,
6783                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6784                 .ops            = &buffer_pipe_buf_ops,
6785                 .spd_release    = buffer_spd_release,
6786         };
6787         struct buffer_ref *ref;
6788         int entries, i;
6789         ssize_t ret = 0;
6790
6791 #ifdef CONFIG_TRACER_MAX_TRACE
6792         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6793                 return -EBUSY;
6794 #endif
6795
6796         if (*ppos & (PAGE_SIZE - 1))
6797                 return -EINVAL;
6798
6799         if (len & (PAGE_SIZE - 1)) {
6800                 if (len < PAGE_SIZE)
6801                         return -EINVAL;
6802                 len &= PAGE_MASK;
6803         }
6804
6805         if (splice_grow_spd(pipe, &spd))
6806                 return -ENOMEM;
6807
6808  again:
6809         trace_access_lock(iter->cpu_file);
6810         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6811
6812         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6813                 struct page *page;
6814                 int r;
6815
6816                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6817                 if (!ref) {
6818                         ret = -ENOMEM;
6819                         break;
6820                 }
6821
6822                 refcount_set(&ref->refcount, 1);
6823                 ref->buffer = iter->trace_buffer->buffer;
6824                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6825                 if (IS_ERR(ref->page)) {
6826                         ret = PTR_ERR(ref->page);
6827                         ref->page = NULL;
6828                         kfree(ref);
6829                         break;
6830                 }
6831                 ref->cpu = iter->cpu_file;
6832
6833                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6834                                           len, iter->cpu_file, 1);
6835                 if (r < 0) {
6836                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6837                                                    ref->page);
6838                         kfree(ref);
6839                         break;
6840                 }
6841
6842                 page = virt_to_page(ref->page);
6843
6844                 spd.pages[i] = page;
6845                 spd.partial[i].len = PAGE_SIZE;
6846                 spd.partial[i].offset = 0;
6847                 spd.partial[i].private = (unsigned long)ref;
6848                 spd.nr_pages++;
6849                 *ppos += PAGE_SIZE;
6850
6851                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6852         }
6853
6854         trace_access_unlock(iter->cpu_file);
6855         spd.nr_pages = i;
6856
6857         /* did we read anything? */
6858         if (!spd.nr_pages) {
6859                 if (ret)
6860                         goto out;
6861
6862                 ret = -EAGAIN;
6863                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6864                         goto out;
6865
6866                 ret = wait_on_pipe(iter, true);
6867                 if (ret)
6868                         goto out;
6869
6870                 goto again;
6871         }
6872
6873         ret = splice_to_pipe(pipe, &spd);
6874 out:
6875         splice_shrink_spd(&spd);
6876
6877         return ret;
6878 }
6879
6880 static const struct file_operations tracing_buffers_fops = {
6881         .open           = tracing_buffers_open,
6882         .read           = tracing_buffers_read,
6883         .poll           = tracing_buffers_poll,
6884         .release        = tracing_buffers_release,
6885         .splice_read    = tracing_buffers_splice_read,
6886         .llseek         = no_llseek,
6887 };
6888
6889 static ssize_t
6890 tracing_stats_read(struct file *filp, char __user *ubuf,
6891                    size_t count, loff_t *ppos)
6892 {
6893         struct inode *inode = file_inode(filp);
6894         struct trace_array *tr = inode->i_private;
6895         struct trace_buffer *trace_buf = &tr->trace_buffer;
6896         int cpu = tracing_get_cpu(inode);
6897         struct trace_seq *s;
6898         unsigned long cnt;
6899         unsigned long long t;
6900         unsigned long usec_rem;
6901
6902         s = kmalloc(sizeof(*s), GFP_KERNEL);
6903         if (!s)
6904                 return -ENOMEM;
6905
6906         trace_seq_init(s);
6907
6908         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6909         trace_seq_printf(s, "entries: %ld\n", cnt);
6910
6911         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6912         trace_seq_printf(s, "overrun: %ld\n", cnt);
6913
6914         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6915         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6916
6917         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6918         trace_seq_printf(s, "bytes: %ld\n", cnt);
6919
6920         if (trace_clocks[tr->clock_id].in_ns) {
6921                 /* local or global for trace_clock */
6922                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6923                 usec_rem = do_div(t, USEC_PER_SEC);
6924                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6925                                                                 t, usec_rem);
6926
6927                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6928                 usec_rem = do_div(t, USEC_PER_SEC);
6929                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6930         } else {
6931                 /* counter or tsc mode for trace_clock */
6932                 trace_seq_printf(s, "oldest event ts: %llu\n",
6933                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6934
6935                 trace_seq_printf(s, "now ts: %llu\n",
6936                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6937         }
6938
6939         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6940         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6941
6942         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6943         trace_seq_printf(s, "read events: %ld\n", cnt);
6944
6945         count = simple_read_from_buffer(ubuf, count, ppos,
6946                                         s->buffer, trace_seq_used(s));
6947
6948         kfree(s);
6949
6950         return count;
6951 }
6952
6953 static const struct file_operations tracing_stats_fops = {
6954         .open           = tracing_open_generic_tr,
6955         .read           = tracing_stats_read,
6956         .llseek         = generic_file_llseek,
6957         .release        = tracing_release_generic_tr,
6958 };
6959
6960 #ifdef CONFIG_DYNAMIC_FTRACE
6961
6962 static ssize_t
6963 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6964                   size_t cnt, loff_t *ppos)
6965 {
6966         unsigned long *p = filp->private_data;
6967         char buf[64]; /* Not too big for a shallow stack */
6968         int r;
6969
6970         r = scnprintf(buf, 63, "%ld", *p);
6971         buf[r++] = '\n';
6972
6973         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6974 }
6975
6976 static const struct file_operations tracing_dyn_info_fops = {
6977         .open           = tracing_open_generic,
6978         .read           = tracing_read_dyn_info,
6979         .llseek         = generic_file_llseek,
6980 };
6981 #endif /* CONFIG_DYNAMIC_FTRACE */
6982
6983 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6984 static void
6985 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6986                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6987                 void *data)
6988 {
6989         tracing_snapshot_instance(tr);
6990 }
6991
6992 static void
6993 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6994                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6995                       void *data)
6996 {
6997         struct ftrace_func_mapper *mapper = data;
6998         long *count = NULL;
6999
7000         if (mapper)
7001                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7002
7003         if (count) {
7004
7005                 if (*count <= 0)
7006                         return;
7007
7008                 (*count)--;
7009         }
7010
7011         tracing_snapshot_instance(tr);
7012 }
7013
7014 static int
7015 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7016                       struct ftrace_probe_ops *ops, void *data)
7017 {
7018         struct ftrace_func_mapper *mapper = data;
7019         long *count = NULL;
7020
7021         seq_printf(m, "%ps:", (void *)ip);
7022
7023         seq_puts(m, "snapshot");
7024
7025         if (mapper)
7026                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7027
7028         if (count)
7029                 seq_printf(m, ":count=%ld\n", *count);
7030         else
7031                 seq_puts(m, ":unlimited\n");
7032
7033         return 0;
7034 }
7035
7036 static int
7037 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7038                      unsigned long ip, void *init_data, void **data)
7039 {
7040         struct ftrace_func_mapper *mapper = *data;
7041
7042         if (!mapper) {
7043                 mapper = allocate_ftrace_func_mapper();
7044                 if (!mapper)
7045                         return -ENOMEM;
7046                 *data = mapper;
7047         }
7048
7049         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7050 }
7051
7052 static void
7053 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7054                      unsigned long ip, void *data)
7055 {
7056         struct ftrace_func_mapper *mapper = data;
7057
7058         if (!ip) {
7059                 if (!mapper)
7060                         return;
7061                 free_ftrace_func_mapper(mapper, NULL);
7062                 return;
7063         }
7064
7065         ftrace_func_mapper_remove_ip(mapper, ip);
7066 }
7067
7068 static struct ftrace_probe_ops snapshot_probe_ops = {
7069         .func                   = ftrace_snapshot,
7070         .print                  = ftrace_snapshot_print,
7071 };
7072
7073 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7074         .func                   = ftrace_count_snapshot,
7075         .print                  = ftrace_snapshot_print,
7076         .init                   = ftrace_snapshot_init,
7077         .free                   = ftrace_snapshot_free,
7078 };
7079
7080 static int
7081 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7082                                char *glob, char *cmd, char *param, int enable)
7083 {
7084         struct ftrace_probe_ops *ops;
7085         void *count = (void *)-1;
7086         char *number;
7087         int ret;
7088
7089         if (!tr)
7090                 return -ENODEV;
7091
7092         /* hash funcs only work with set_ftrace_filter */
7093         if (!enable)
7094                 return -EINVAL;
7095
7096         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7097
7098         if (glob[0] == '!')
7099                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7100
7101         if (!param)
7102                 goto out_reg;
7103
7104         number = strsep(&param, ":");
7105
7106         if (!strlen(number))
7107                 goto out_reg;
7108
7109         /*
7110          * We use the callback data field (which is a pointer)
7111          * as our counter.
7112          */
7113         ret = kstrtoul(number, 0, (unsigned long *)&count);
7114         if (ret)
7115                 return ret;
7116
7117  out_reg:
7118         ret = tracing_alloc_snapshot_instance(tr);
7119         if (ret < 0)
7120                 goto out;
7121
7122         ret = register_ftrace_function_probe(glob, tr, ops, count);
7123
7124  out:
7125         return ret < 0 ? ret : 0;
7126 }
7127
7128 static struct ftrace_func_command ftrace_snapshot_cmd = {
7129         .name                   = "snapshot",
7130         .func                   = ftrace_trace_snapshot_callback,
7131 };
7132
7133 static __init int register_snapshot_cmd(void)
7134 {
7135         return register_ftrace_command(&ftrace_snapshot_cmd);
7136 }
7137 #else
7138 static inline __init int register_snapshot_cmd(void) { return 0; }
7139 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7140
7141 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7142 {
7143         if (WARN_ON(!tr->dir))
7144                 return ERR_PTR(-ENODEV);
7145
7146         /* Top directory uses NULL as the parent */
7147         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7148                 return NULL;
7149
7150         /* All sub buffers have a descriptor */
7151         return tr->dir;
7152 }
7153
7154 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7155 {
7156         struct dentry *d_tracer;
7157
7158         if (tr->percpu_dir)
7159                 return tr->percpu_dir;
7160
7161         d_tracer = tracing_get_dentry(tr);
7162         if (IS_ERR(d_tracer))
7163                 return NULL;
7164
7165         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7166
7167         WARN_ONCE(!tr->percpu_dir,
7168                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7169
7170         return tr->percpu_dir;
7171 }
7172
7173 static struct dentry *
7174 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7175                       void *data, long cpu, const struct file_operations *fops)
7176 {
7177         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7178
7179         if (ret) /* See tracing_get_cpu() */
7180                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7181         return ret;
7182 }
7183
7184 static void
7185 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7186 {
7187         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7188         struct dentry *d_cpu;
7189         char cpu_dir[30]; /* 30 characters should be more than enough */
7190
7191         if (!d_percpu)
7192                 return;
7193
7194         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7195         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7196         if (!d_cpu) {
7197                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7198                 return;
7199         }
7200
7201         /* per cpu trace_pipe */
7202         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7203                                 tr, cpu, &tracing_pipe_fops);
7204
7205         /* per cpu trace */
7206         trace_create_cpu_file("trace", 0644, d_cpu,
7207                                 tr, cpu, &tracing_fops);
7208
7209         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7210                                 tr, cpu, &tracing_buffers_fops);
7211
7212         trace_create_cpu_file("stats", 0444, d_cpu,
7213                                 tr, cpu, &tracing_stats_fops);
7214
7215         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7216                                 tr, cpu, &tracing_entries_fops);
7217
7218 #ifdef CONFIG_TRACER_SNAPSHOT
7219         trace_create_cpu_file("snapshot", 0644, d_cpu,
7220                                 tr, cpu, &snapshot_fops);
7221
7222         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7223                                 tr, cpu, &snapshot_raw_fops);
7224 #endif
7225 }
7226
7227 #ifdef CONFIG_FTRACE_SELFTEST
7228 /* Let selftest have access to static functions in this file */
7229 #include "trace_selftest.c"
7230 #endif
7231
7232 static ssize_t
7233 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7234                         loff_t *ppos)
7235 {
7236         struct trace_option_dentry *topt = filp->private_data;
7237         char *buf;
7238
7239         if (topt->flags->val & topt->opt->bit)
7240                 buf = "1\n";
7241         else
7242                 buf = "0\n";
7243
7244         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7245 }
7246
7247 static ssize_t
7248 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7249                          loff_t *ppos)
7250 {
7251         struct trace_option_dentry *topt = filp->private_data;
7252         unsigned long val;
7253         int ret;
7254
7255         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7256         if (ret)
7257                 return ret;
7258
7259         if (val != 0 && val != 1)
7260                 return -EINVAL;
7261
7262         if (!!(topt->flags->val & topt->opt->bit) != val) {
7263                 mutex_lock(&trace_types_lock);
7264                 ret = __set_tracer_option(topt->tr, topt->flags,
7265                                           topt->opt, !val);
7266                 mutex_unlock(&trace_types_lock);
7267                 if (ret)
7268                         return ret;
7269         }
7270
7271         *ppos += cnt;
7272
7273         return cnt;
7274 }
7275
7276
7277 static const struct file_operations trace_options_fops = {
7278         .open = tracing_open_generic,
7279         .read = trace_options_read,
7280         .write = trace_options_write,
7281         .llseek = generic_file_llseek,
7282 };
7283
7284 /*
7285  * In order to pass in both the trace_array descriptor as well as the index
7286  * to the flag that the trace option file represents, the trace_array
7287  * has a character array of trace_flags_index[], which holds the index
7288  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7289  * The address of this character array is passed to the flag option file
7290  * read/write callbacks.
7291  *
7292  * In order to extract both the index and the trace_array descriptor,
7293  * get_tr_index() uses the following algorithm.
7294  *
7295  *   idx = *ptr;
7296  *
7297  * As the pointer itself contains the address of the index (remember
7298  * index[1] == 1).
7299  *
7300  * Then to get the trace_array descriptor, by subtracting that index
7301  * from the ptr, we get to the start of the index itself.
7302  *
7303  *   ptr - idx == &index[0]
7304  *
7305  * Then a simple container_of() from that pointer gets us to the
7306  * trace_array descriptor.
7307  */
7308 static void get_tr_index(void *data, struct trace_array **ptr,
7309                          unsigned int *pindex)
7310 {
7311         *pindex = *(unsigned char *)data;
7312
7313         *ptr = container_of(data - *pindex, struct trace_array,
7314                             trace_flags_index);
7315 }
7316
7317 static ssize_t
7318 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7319                         loff_t *ppos)
7320 {
7321         void *tr_index = filp->private_data;
7322         struct trace_array *tr;
7323         unsigned int index;
7324         char *buf;
7325
7326         get_tr_index(tr_index, &tr, &index);
7327
7328         if (tr->trace_flags & (1 << index))
7329                 buf = "1\n";
7330         else
7331                 buf = "0\n";
7332
7333         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7334 }
7335
7336 static ssize_t
7337 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7338                          loff_t *ppos)
7339 {
7340         void *tr_index = filp->private_data;
7341         struct trace_array *tr;
7342         unsigned int index;
7343         unsigned long val;
7344         int ret;
7345
7346         get_tr_index(tr_index, &tr, &index);
7347
7348         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7349         if (ret)
7350                 return ret;
7351
7352         if (val != 0 && val != 1)
7353                 return -EINVAL;
7354
7355         mutex_lock(&event_mutex);
7356         mutex_lock(&trace_types_lock);
7357         ret = set_tracer_flag(tr, 1 << index, val);
7358         mutex_unlock(&trace_types_lock);
7359         mutex_unlock(&event_mutex);
7360
7361         if (ret < 0)
7362                 return ret;
7363
7364         *ppos += cnt;
7365
7366         return cnt;
7367 }
7368
7369 static const struct file_operations trace_options_core_fops = {
7370         .open = tracing_open_generic,
7371         .read = trace_options_core_read,
7372         .write = trace_options_core_write,
7373         .llseek = generic_file_llseek,
7374 };
7375
7376 struct dentry *trace_create_file(const char *name,
7377                                  umode_t mode,
7378                                  struct dentry *parent,
7379                                  void *data,
7380                                  const struct file_operations *fops)
7381 {
7382         struct dentry *ret;
7383
7384         ret = tracefs_create_file(name, mode, parent, data, fops);
7385         if (!ret)
7386                 pr_warn("Could not create tracefs '%s' entry\n", name);
7387
7388         return ret;
7389 }
7390
7391
7392 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7393 {
7394         struct dentry *d_tracer;
7395
7396         if (tr->options)
7397                 return tr->options;
7398
7399         d_tracer = tracing_get_dentry(tr);
7400         if (IS_ERR(d_tracer))
7401                 return NULL;
7402
7403         tr->options = tracefs_create_dir("options", d_tracer);
7404         if (!tr->options) {
7405                 pr_warn("Could not create tracefs directory 'options'\n");
7406                 return NULL;
7407         }
7408
7409         return tr->options;
7410 }
7411
7412 static void
7413 create_trace_option_file(struct trace_array *tr,
7414                          struct trace_option_dentry *topt,
7415                          struct tracer_flags *flags,
7416                          struct tracer_opt *opt)
7417 {
7418         struct dentry *t_options;
7419
7420         t_options = trace_options_init_dentry(tr);
7421         if (!t_options)
7422                 return;
7423
7424         topt->flags = flags;
7425         topt->opt = opt;
7426         topt->tr = tr;
7427
7428         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7429                                     &trace_options_fops);
7430
7431 }
7432
7433 static void
7434 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7435 {
7436         struct trace_option_dentry *topts;
7437         struct trace_options *tr_topts;
7438         struct tracer_flags *flags;
7439         struct tracer_opt *opts;
7440         int cnt;
7441         int i;
7442
7443         if (!tracer)
7444                 return;
7445
7446         flags = tracer->flags;
7447
7448         if (!flags || !flags->opts)
7449                 return;
7450
7451         /*
7452          * If this is an instance, only create flags for tracers
7453          * the instance may have.
7454          */
7455         if (!trace_ok_for_array(tracer, tr))
7456                 return;
7457
7458         for (i = 0; i < tr->nr_topts; i++) {
7459                 /* Make sure there's no duplicate flags. */
7460                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7461                         return;
7462         }
7463
7464         opts = flags->opts;
7465
7466         for (cnt = 0; opts[cnt].name; cnt++)
7467                 ;
7468
7469         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7470         if (!topts)
7471                 return;
7472
7473         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7474                             GFP_KERNEL);
7475         if (!tr_topts) {
7476                 kfree(topts);
7477                 return;
7478         }
7479
7480         tr->topts = tr_topts;
7481         tr->topts[tr->nr_topts].tracer = tracer;
7482         tr->topts[tr->nr_topts].topts = topts;
7483         tr->nr_topts++;
7484
7485         for (cnt = 0; opts[cnt].name; cnt++) {
7486                 create_trace_option_file(tr, &topts[cnt], flags,
7487                                          &opts[cnt]);
7488                 WARN_ONCE(topts[cnt].entry == NULL,
7489                           "Failed to create trace option: %s",
7490                           opts[cnt].name);
7491         }
7492 }
7493
7494 static struct dentry *
7495 create_trace_option_core_file(struct trace_array *tr,
7496                               const char *option, long index)
7497 {
7498         struct dentry *t_options;
7499
7500         t_options = trace_options_init_dentry(tr);
7501         if (!t_options)
7502                 return NULL;
7503
7504         return trace_create_file(option, 0644, t_options,
7505                                  (void *)&tr->trace_flags_index[index],
7506                                  &trace_options_core_fops);
7507 }
7508
7509 static void create_trace_options_dir(struct trace_array *tr)
7510 {
7511         struct dentry *t_options;
7512         bool top_level = tr == &global_trace;
7513         int i;
7514
7515         t_options = trace_options_init_dentry(tr);
7516         if (!t_options)
7517                 return;
7518
7519         for (i = 0; trace_options[i]; i++) {
7520                 if (top_level ||
7521                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7522                         create_trace_option_core_file(tr, trace_options[i], i);
7523         }
7524 }
7525
7526 static ssize_t
7527 rb_simple_read(struct file *filp, char __user *ubuf,
7528                size_t cnt, loff_t *ppos)
7529 {
7530         struct trace_array *tr = filp->private_data;
7531         char buf[64];
7532         int r;
7533
7534         r = tracer_tracing_is_on(tr);
7535         r = sprintf(buf, "%d\n", r);
7536
7537         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7538 }
7539
7540 static ssize_t
7541 rb_simple_write(struct file *filp, const char __user *ubuf,
7542                 size_t cnt, loff_t *ppos)
7543 {
7544         struct trace_array *tr = filp->private_data;
7545         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7546         unsigned long val;
7547         int ret;
7548
7549         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7550         if (ret)
7551                 return ret;
7552
7553         if (buffer) {
7554                 mutex_lock(&trace_types_lock);
7555                 if (!!val == tracer_tracing_is_on(tr)) {
7556                         val = 0; /* do nothing */
7557                 } else if (val) {
7558                         tracer_tracing_on(tr);
7559                         if (tr->current_trace->start)
7560                                 tr->current_trace->start(tr);
7561                 } else {
7562                         tracer_tracing_off(tr);
7563                         if (tr->current_trace->stop)
7564                                 tr->current_trace->stop(tr);
7565                 }
7566                 mutex_unlock(&trace_types_lock);
7567         }
7568
7569         (*ppos)++;
7570
7571         return cnt;
7572 }
7573
7574 static const struct file_operations rb_simple_fops = {
7575         .open           = tracing_open_generic_tr,
7576         .read           = rb_simple_read,
7577         .write          = rb_simple_write,
7578         .release        = tracing_release_generic_tr,
7579         .llseek         = default_llseek,
7580 };
7581
7582 struct dentry *trace_instance_dir;
7583
7584 static void
7585 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7586
7587 static int
7588 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7589 {
7590         enum ring_buffer_flags rb_flags;
7591
7592         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7593
7594         buf->tr = tr;
7595
7596         buf->buffer = ring_buffer_alloc(size, rb_flags);
7597         if (!buf->buffer)
7598                 return -ENOMEM;
7599
7600         buf->data = alloc_percpu(struct trace_array_cpu);
7601         if (!buf->data) {
7602                 ring_buffer_free(buf->buffer);
7603                 buf->buffer = NULL;
7604                 return -ENOMEM;
7605         }
7606
7607         /* Allocate the first page for all buffers */
7608         set_buffer_entries(&tr->trace_buffer,
7609                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7610
7611         return 0;
7612 }
7613
7614 static int allocate_trace_buffers(struct trace_array *tr, int size)
7615 {
7616         int ret;
7617
7618         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7619         if (ret)
7620                 return ret;
7621
7622 #ifdef CONFIG_TRACER_MAX_TRACE
7623         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7624                                     allocate_snapshot ? size : 1);
7625         if (WARN_ON(ret)) {
7626                 ring_buffer_free(tr->trace_buffer.buffer);
7627                 tr->trace_buffer.buffer = NULL;
7628                 free_percpu(tr->trace_buffer.data);
7629                 tr->trace_buffer.data = NULL;
7630                 return -ENOMEM;
7631         }
7632         tr->allocated_snapshot = allocate_snapshot;
7633
7634         /*
7635          * Only the top level trace array gets its snapshot allocated
7636          * from the kernel command line.
7637          */
7638         allocate_snapshot = false;
7639 #endif
7640
7641         /*
7642          * Because of some magic with the way alloc_percpu() works on
7643          * x86_64, we need to synchronize the pgd of all the tables,
7644          * otherwise the trace events that happen in x86_64 page fault
7645          * handlers can't cope with accessing the chance that a
7646          * alloc_percpu()'d memory might be touched in the page fault trace
7647          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7648          * calls in tracing, because something might get triggered within a
7649          * page fault trace event!
7650          */
7651         vmalloc_sync_mappings();
7652
7653         return 0;
7654 }
7655
7656 static void free_trace_buffer(struct trace_buffer *buf)
7657 {
7658         if (buf->buffer) {
7659                 ring_buffer_free(buf->buffer);
7660                 buf->buffer = NULL;
7661                 free_percpu(buf->data);
7662                 buf->data = NULL;
7663         }
7664 }
7665
7666 static void free_trace_buffers(struct trace_array *tr)
7667 {
7668         if (!tr)
7669                 return;
7670
7671         free_trace_buffer(&tr->trace_buffer);
7672
7673 #ifdef CONFIG_TRACER_MAX_TRACE
7674         free_trace_buffer(&tr->max_buffer);
7675 #endif
7676 }
7677
7678 static void init_trace_flags_index(struct trace_array *tr)
7679 {
7680         int i;
7681
7682         /* Used by the trace options files */
7683         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7684                 tr->trace_flags_index[i] = i;
7685 }
7686
7687 static void __update_tracer_options(struct trace_array *tr)
7688 {
7689         struct tracer *t;
7690
7691         for (t = trace_types; t; t = t->next)
7692                 add_tracer_options(tr, t);
7693 }
7694
7695 static void update_tracer_options(struct trace_array *tr)
7696 {
7697         mutex_lock(&trace_types_lock);
7698         __update_tracer_options(tr);
7699         mutex_unlock(&trace_types_lock);
7700 }
7701
7702 static int instance_mkdir(const char *name)
7703 {
7704         struct trace_array *tr;
7705         int ret;
7706
7707         mutex_lock(&event_mutex);
7708         mutex_lock(&trace_types_lock);
7709
7710         ret = -EEXIST;
7711         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7712                 if (tr->name && strcmp(tr->name, name) == 0)
7713                         goto out_unlock;
7714         }
7715
7716         ret = -ENOMEM;
7717         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7718         if (!tr)
7719                 goto out_unlock;
7720
7721         tr->name = kstrdup(name, GFP_KERNEL);
7722         if (!tr->name)
7723                 goto out_free_tr;
7724
7725         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7726                 goto out_free_tr;
7727
7728         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7729
7730         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7731
7732         raw_spin_lock_init(&tr->start_lock);
7733
7734         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7735
7736         tr->current_trace = &nop_trace;
7737
7738         INIT_LIST_HEAD(&tr->systems);
7739         INIT_LIST_HEAD(&tr->events);
7740
7741         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7742                 goto out_free_tr;
7743
7744         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7745         if (!tr->dir)
7746                 goto out_free_tr;
7747
7748         ret = event_trace_add_tracer(tr->dir, tr);
7749         if (ret) {
7750                 tracefs_remove_recursive(tr->dir);
7751                 goto out_free_tr;
7752         }
7753
7754         ftrace_init_trace_array(tr);
7755
7756         init_tracer_tracefs(tr, tr->dir);
7757         init_trace_flags_index(tr);
7758         __update_tracer_options(tr);
7759
7760         list_add(&tr->list, &ftrace_trace_arrays);
7761
7762         mutex_unlock(&trace_types_lock);
7763         mutex_unlock(&event_mutex);
7764
7765         return 0;
7766
7767  out_free_tr:
7768         free_trace_buffers(tr);
7769         free_cpumask_var(tr->tracing_cpumask);
7770         kfree(tr->name);
7771         kfree(tr);
7772
7773  out_unlock:
7774         mutex_unlock(&trace_types_lock);
7775         mutex_unlock(&event_mutex);
7776
7777         return ret;
7778
7779 }
7780
7781 static int instance_rmdir(const char *name)
7782 {
7783         struct trace_array *tr;
7784         int found = 0;
7785         int ret;
7786         int i;
7787
7788         mutex_lock(&event_mutex);
7789         mutex_lock(&trace_types_lock);
7790
7791         ret = -ENODEV;
7792         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7793                 if (tr->name && strcmp(tr->name, name) == 0) {
7794                         found = 1;
7795                         break;
7796                 }
7797         }
7798         if (!found)
7799                 goto out_unlock;
7800
7801         ret = -EBUSY;
7802         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7803                 goto out_unlock;
7804
7805         list_del(&tr->list);
7806
7807         /* Disable all the flags that were enabled coming in */
7808         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7809                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7810                         set_tracer_flag(tr, 1 << i, 0);
7811         }
7812
7813         tracing_set_nop(tr);
7814         clear_ftrace_function_probes(tr);
7815         event_trace_del_tracer(tr);
7816         ftrace_clear_pids(tr);
7817         ftrace_destroy_function_files(tr);
7818         tracefs_remove_recursive(tr->dir);
7819         free_trace_buffers(tr);
7820
7821         for (i = 0; i < tr->nr_topts; i++) {
7822                 kfree(tr->topts[i].topts);
7823         }
7824         kfree(tr->topts);
7825
7826         free_cpumask_var(tr->tracing_cpumask);
7827         kfree(tr->name);
7828         kfree(tr);
7829
7830         ret = 0;
7831
7832  out_unlock:
7833         mutex_unlock(&trace_types_lock);
7834         mutex_unlock(&event_mutex);
7835
7836         return ret;
7837 }
7838
7839 static __init void create_trace_instances(struct dentry *d_tracer)
7840 {
7841         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7842                                                          instance_mkdir,
7843                                                          instance_rmdir);
7844         if (WARN_ON(!trace_instance_dir))
7845                 return;
7846 }
7847
7848 static void
7849 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7850 {
7851         int cpu;
7852
7853         trace_create_file("available_tracers", 0444, d_tracer,
7854                         tr, &show_traces_fops);
7855
7856         trace_create_file("current_tracer", 0644, d_tracer,
7857                         tr, &set_tracer_fops);
7858
7859         trace_create_file("tracing_cpumask", 0644, d_tracer,
7860                           tr, &tracing_cpumask_fops);
7861
7862         trace_create_file("trace_options", 0644, d_tracer,
7863                           tr, &tracing_iter_fops);
7864
7865         trace_create_file("trace", 0644, d_tracer,
7866                           tr, &tracing_fops);
7867
7868         trace_create_file("trace_pipe", 0444, d_tracer,
7869                           tr, &tracing_pipe_fops);
7870
7871         trace_create_file("buffer_size_kb", 0644, d_tracer,
7872                           tr, &tracing_entries_fops);
7873
7874         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7875                           tr, &tracing_total_entries_fops);
7876
7877         trace_create_file("free_buffer", 0200, d_tracer,
7878                           tr, &tracing_free_buffer_fops);
7879
7880         trace_create_file("trace_marker", 0220, d_tracer,
7881                           tr, &tracing_mark_fops);
7882
7883         trace_create_file("trace_marker_raw", 0220, d_tracer,
7884                           tr, &tracing_mark_raw_fops);
7885
7886         trace_create_file("trace_clock", 0644, d_tracer, tr,
7887                           &trace_clock_fops);
7888
7889         trace_create_file("tracing_on", 0644, d_tracer,
7890                           tr, &rb_simple_fops);
7891
7892         create_trace_options_dir(tr);
7893
7894 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7895         trace_create_file("tracing_max_latency", 0644, d_tracer,
7896                         &tr->max_latency, &tracing_max_lat_fops);
7897 #endif
7898
7899         if (ftrace_create_function_files(tr, d_tracer))
7900                 WARN(1, "Could not allocate function filter files");
7901
7902 #ifdef CONFIG_TRACER_SNAPSHOT
7903         trace_create_file("snapshot", 0644, d_tracer,
7904                           tr, &snapshot_fops);
7905 #endif
7906
7907         for_each_tracing_cpu(cpu)
7908                 tracing_init_tracefs_percpu(tr, cpu);
7909
7910         ftrace_init_tracefs(tr, d_tracer);
7911 }
7912
7913 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7914 {
7915         struct vfsmount *mnt;
7916         struct file_system_type *type;
7917
7918         /*
7919          * To maintain backward compatibility for tools that mount
7920          * debugfs to get to the tracing facility, tracefs is automatically
7921          * mounted to the debugfs/tracing directory.
7922          */
7923         type = get_fs_type("tracefs");
7924         if (!type)
7925                 return NULL;
7926         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7927         put_filesystem(type);
7928         if (IS_ERR(mnt))
7929                 return NULL;
7930         mntget(mnt);
7931
7932         return mnt;
7933 }
7934
7935 /**
7936  * tracing_init_dentry - initialize top level trace array
7937  *
7938  * This is called when creating files or directories in the tracing
7939  * directory. It is called via fs_initcall() by any of the boot up code
7940  * and expects to return the dentry of the top level tracing directory.
7941  */
7942 struct dentry *tracing_init_dentry(void)
7943 {
7944         struct trace_array *tr = &global_trace;
7945
7946         /* The top level trace array uses  NULL as parent */
7947         if (tr->dir)
7948                 return NULL;
7949
7950         if (WARN_ON(!tracefs_initialized()) ||
7951                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7952                  WARN_ON(!debugfs_initialized())))
7953                 return ERR_PTR(-ENODEV);
7954
7955         /*
7956          * As there may still be users that expect the tracing
7957          * files to exist in debugfs/tracing, we must automount
7958          * the tracefs file system there, so older tools still
7959          * work with the newer kerenl.
7960          */
7961         tr->dir = debugfs_create_automount("tracing", NULL,
7962                                            trace_automount, NULL);
7963         if (!tr->dir) {
7964                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7965                 return ERR_PTR(-ENOMEM);
7966         }
7967
7968         return NULL;
7969 }
7970
7971 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7972 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7973
7974 static void __init trace_eval_init(void)
7975 {
7976         int len;
7977
7978         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7979         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7980 }
7981
7982 #ifdef CONFIG_MODULES
7983 static void trace_module_add_evals(struct module *mod)
7984 {
7985         if (!mod->num_trace_evals)
7986                 return;
7987
7988         /*
7989          * Modules with bad taint do not have events created, do
7990          * not bother with enums either.
7991          */
7992         if (trace_module_has_bad_taint(mod))
7993                 return;
7994
7995         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7996 }
7997
7998 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7999 static void trace_module_remove_evals(struct module *mod)
8000 {
8001         union trace_eval_map_item *map;
8002         union trace_eval_map_item **last = &trace_eval_maps;
8003
8004         if (!mod->num_trace_evals)
8005                 return;
8006
8007         mutex_lock(&trace_eval_mutex);
8008
8009         map = trace_eval_maps;
8010
8011         while (map) {
8012                 if (map->head.mod == mod)
8013                         break;
8014                 map = trace_eval_jmp_to_tail(map);
8015                 last = &map->tail.next;
8016                 map = map->tail.next;
8017         }
8018         if (!map)
8019                 goto out;
8020
8021         *last = trace_eval_jmp_to_tail(map)->tail.next;
8022         kfree(map);
8023  out:
8024         mutex_unlock(&trace_eval_mutex);
8025 }
8026 #else
8027 static inline void trace_module_remove_evals(struct module *mod) { }
8028 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8029
8030 static int trace_module_notify(struct notifier_block *self,
8031                                unsigned long val, void *data)
8032 {
8033         struct module *mod = data;
8034
8035         switch (val) {
8036         case MODULE_STATE_COMING:
8037                 trace_module_add_evals(mod);
8038                 break;
8039         case MODULE_STATE_GOING:
8040                 trace_module_remove_evals(mod);
8041                 break;
8042         }
8043
8044         return 0;
8045 }
8046
8047 static struct notifier_block trace_module_nb = {
8048         .notifier_call = trace_module_notify,
8049         .priority = 0,
8050 };
8051 #endif /* CONFIG_MODULES */
8052
8053 static __init int tracer_init_tracefs(void)
8054 {
8055         struct dentry *d_tracer;
8056
8057         trace_access_lock_init();
8058
8059         d_tracer = tracing_init_dentry();
8060         if (IS_ERR(d_tracer))
8061                 return 0;
8062
8063         init_tracer_tracefs(&global_trace, d_tracer);
8064         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8065
8066         trace_create_file("tracing_thresh", 0644, d_tracer,
8067                         &global_trace, &tracing_thresh_fops);
8068
8069         trace_create_file("README", 0444, d_tracer,
8070                         NULL, &tracing_readme_fops);
8071
8072         trace_create_file("saved_cmdlines", 0444, d_tracer,
8073                         NULL, &tracing_saved_cmdlines_fops);
8074
8075         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8076                           NULL, &tracing_saved_cmdlines_size_fops);
8077
8078         trace_create_file("saved_tgids", 0444, d_tracer,
8079                         NULL, &tracing_saved_tgids_fops);
8080
8081         trace_eval_init();
8082
8083         trace_create_eval_file(d_tracer);
8084
8085 #ifdef CONFIG_MODULES
8086         register_module_notifier(&trace_module_nb);
8087 #endif
8088
8089 #ifdef CONFIG_DYNAMIC_FTRACE
8090         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8091                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8092 #endif
8093
8094         create_trace_instances(d_tracer);
8095
8096         update_tracer_options(&global_trace);
8097
8098         return 0;
8099 }
8100
8101 static int trace_panic_handler(struct notifier_block *this,
8102                                unsigned long event, void *unused)
8103 {
8104         if (ftrace_dump_on_oops)
8105                 ftrace_dump(ftrace_dump_on_oops);
8106         return NOTIFY_OK;
8107 }
8108
8109 static struct notifier_block trace_panic_notifier = {
8110         .notifier_call  = trace_panic_handler,
8111         .next           = NULL,
8112         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8113 };
8114
8115 static int trace_die_handler(struct notifier_block *self,
8116                              unsigned long val,
8117                              void *data)
8118 {
8119         switch (val) {
8120         case DIE_OOPS:
8121                 if (ftrace_dump_on_oops)
8122                         ftrace_dump(ftrace_dump_on_oops);
8123                 break;
8124         default:
8125                 break;
8126         }
8127         return NOTIFY_OK;
8128 }
8129
8130 static struct notifier_block trace_die_notifier = {
8131         .notifier_call = trace_die_handler,
8132         .priority = 200
8133 };
8134
8135 /*
8136  * printk is set to max of 1024, we really don't need it that big.
8137  * Nothing should be printing 1000 characters anyway.
8138  */
8139 #define TRACE_MAX_PRINT         1000
8140
8141 /*
8142  * Define here KERN_TRACE so that we have one place to modify
8143  * it if we decide to change what log level the ftrace dump
8144  * should be at.
8145  */
8146 #define KERN_TRACE              KERN_EMERG
8147
8148 void
8149 trace_printk_seq(struct trace_seq *s)
8150 {
8151         /* Probably should print a warning here. */
8152         if (s->seq.len >= TRACE_MAX_PRINT)
8153                 s->seq.len = TRACE_MAX_PRINT;
8154
8155         /*
8156          * More paranoid code. Although the buffer size is set to
8157          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8158          * an extra layer of protection.
8159          */
8160         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8161                 s->seq.len = s->seq.size - 1;
8162
8163         /* should be zero ended, but we are paranoid. */
8164         s->buffer[s->seq.len] = 0;
8165
8166         printk(KERN_TRACE "%s", s->buffer);
8167
8168         trace_seq_init(s);
8169 }
8170
8171 void trace_init_global_iter(struct trace_iterator *iter)
8172 {
8173         iter->tr = &global_trace;
8174         iter->trace = iter->tr->current_trace;
8175         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8176         iter->trace_buffer = &global_trace.trace_buffer;
8177
8178         if (iter->trace && iter->trace->open)
8179                 iter->trace->open(iter);
8180
8181         /* Annotate start of buffers if we had overruns */
8182         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8183                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8184
8185         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8186         if (trace_clocks[iter->tr->clock_id].in_ns)
8187                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8188 }
8189
8190 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8191 {
8192         /* use static because iter can be a bit big for the stack */
8193         static struct trace_iterator iter;
8194         static atomic_t dump_running;
8195         struct trace_array *tr = &global_trace;
8196         unsigned int old_userobj;
8197         unsigned long flags;
8198         int cnt = 0, cpu;
8199
8200         /* Only allow one dump user at a time. */
8201         if (atomic_inc_return(&dump_running) != 1) {
8202                 atomic_dec(&dump_running);
8203                 return;
8204         }
8205
8206         /*
8207          * Always turn off tracing when we dump.
8208          * We don't need to show trace output of what happens
8209          * between multiple crashes.
8210          *
8211          * If the user does a sysrq-z, then they can re-enable
8212          * tracing with echo 1 > tracing_on.
8213          */
8214         tracing_off();
8215
8216         local_irq_save(flags);
8217         printk_nmi_direct_enter();
8218
8219         /* Simulate the iterator */
8220         trace_init_global_iter(&iter);
8221
8222         for_each_tracing_cpu(cpu) {
8223                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8224         }
8225
8226         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8227
8228         /* don't look at user memory in panic mode */
8229         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8230
8231         switch (oops_dump_mode) {
8232         case DUMP_ALL:
8233                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8234                 break;
8235         case DUMP_ORIG:
8236                 iter.cpu_file = raw_smp_processor_id();
8237                 break;
8238         case DUMP_NONE:
8239                 goto out_enable;
8240         default:
8241                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8242                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8243         }
8244
8245         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8246
8247         /* Did function tracer already get disabled? */
8248         if (ftrace_is_dead()) {
8249                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8250                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8251         }
8252
8253         /*
8254          * We need to stop all tracing on all CPUS to read the
8255          * the next buffer. This is a bit expensive, but is
8256          * not done often. We fill all what we can read,
8257          * and then release the locks again.
8258          */
8259
8260         while (!trace_empty(&iter)) {
8261
8262                 if (!cnt)
8263                         printk(KERN_TRACE "---------------------------------\n");
8264
8265                 cnt++;
8266
8267                 trace_iterator_reset(&iter);
8268                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8269
8270                 if (trace_find_next_entry_inc(&iter) != NULL) {
8271                         int ret;
8272
8273                         ret = print_trace_line(&iter);
8274                         if (ret != TRACE_TYPE_NO_CONSUME)
8275                                 trace_consume(&iter);
8276                 }
8277                 touch_nmi_watchdog();
8278
8279                 trace_printk_seq(&iter.seq);
8280         }
8281
8282         if (!cnt)
8283                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8284         else
8285                 printk(KERN_TRACE "---------------------------------\n");
8286
8287  out_enable:
8288         tr->trace_flags |= old_userobj;
8289
8290         for_each_tracing_cpu(cpu) {
8291                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8292         }
8293         atomic_dec(&dump_running);
8294         printk_nmi_direct_exit();
8295         local_irq_restore(flags);
8296 }
8297 EXPORT_SYMBOL_GPL(ftrace_dump);
8298
8299 __init static int tracer_alloc_buffers(void)
8300 {
8301         int ring_buf_size;
8302         int ret = -ENOMEM;
8303
8304         /*
8305          * Make sure we don't accidently add more trace options
8306          * than we have bits for.
8307          */
8308         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8309
8310         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8311                 goto out;
8312
8313         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8314                 goto out_free_buffer_mask;
8315
8316         /* Only allocate trace_printk buffers if a trace_printk exists */
8317         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8318                 /* Must be called before global_trace.buffer is allocated */
8319                 trace_printk_init_buffers();
8320
8321         /* To save memory, keep the ring buffer size to its minimum */
8322         if (ring_buffer_expanded)
8323                 ring_buf_size = trace_buf_size;
8324         else
8325                 ring_buf_size = 1;
8326
8327         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8328         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8329
8330         raw_spin_lock_init(&global_trace.start_lock);
8331
8332         /*
8333          * The prepare callbacks allocates some memory for the ring buffer. We
8334          * don't free the buffer if the if the CPU goes down. If we were to free
8335          * the buffer, then the user would lose any trace that was in the
8336          * buffer. The memory will be removed once the "instance" is removed.
8337          */
8338         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8339                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8340                                       NULL);
8341         if (ret < 0)
8342                 goto out_free_cpumask;
8343         /* Used for event triggers */
8344         ret = -ENOMEM;
8345         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8346         if (!temp_buffer)
8347                 goto out_rm_hp_state;
8348
8349         if (trace_create_savedcmd() < 0)
8350                 goto out_free_temp_buffer;
8351
8352         /* TODO: make the number of buffers hot pluggable with CPUS */
8353         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8354                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8355                 WARN_ON(1);
8356                 goto out_free_savedcmd;
8357         }
8358
8359         if (global_trace.buffer_disabled)
8360                 tracing_off();
8361
8362         if (trace_boot_clock) {
8363                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8364                 if (ret < 0)
8365                         pr_warn("Trace clock %s not defined, going back to default\n",
8366                                 trace_boot_clock);
8367         }
8368
8369         /*
8370          * register_tracer() might reference current_trace, so it
8371          * needs to be set before we register anything. This is
8372          * just a bootstrap of current_trace anyway.
8373          */
8374         global_trace.current_trace = &nop_trace;
8375
8376         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8377
8378         ftrace_init_global_array_ops(&global_trace);
8379
8380         init_trace_flags_index(&global_trace);
8381
8382         register_tracer(&nop_trace);
8383
8384         /* Function tracing may start here (via kernel command line) */
8385         init_function_trace();
8386
8387         /* All seems OK, enable tracing */
8388         tracing_disabled = 0;
8389
8390         atomic_notifier_chain_register(&panic_notifier_list,
8391                                        &trace_panic_notifier);
8392
8393         register_die_notifier(&trace_die_notifier);
8394
8395         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8396
8397         INIT_LIST_HEAD(&global_trace.systems);
8398         INIT_LIST_HEAD(&global_trace.events);
8399         list_add(&global_trace.list, &ftrace_trace_arrays);
8400
8401         apply_trace_boot_options();
8402
8403         register_snapshot_cmd();
8404
8405         return 0;
8406
8407 out_free_savedcmd:
8408         free_saved_cmdlines_buffer(savedcmd);
8409 out_free_temp_buffer:
8410         ring_buffer_free(temp_buffer);
8411 out_rm_hp_state:
8412         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8413 out_free_cpumask:
8414         free_cpumask_var(global_trace.tracing_cpumask);
8415 out_free_buffer_mask:
8416         free_cpumask_var(tracing_buffer_mask);
8417 out:
8418         return ret;
8419 }
8420
8421 void __init early_trace_init(void)
8422 {
8423         if (tracepoint_printk) {
8424                 tracepoint_print_iter =
8425                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8426                 if (WARN_ON(!tracepoint_print_iter))
8427                         tracepoint_printk = 0;
8428                 else
8429                         static_key_enable(&tracepoint_printk_key.key);
8430         }
8431         tracer_alloc_buffers();
8432 }
8433
8434 void __init trace_init(void)
8435 {
8436         trace_event_init();
8437 }
8438
8439 __init static int clear_boot_tracer(void)
8440 {
8441         /*
8442          * The default tracer at boot buffer is an init section.
8443          * This function is called in lateinit. If we did not
8444          * find the boot tracer, then clear it out, to prevent
8445          * later registration from accessing the buffer that is
8446          * about to be freed.
8447          */
8448         if (!default_bootup_tracer)
8449                 return 0;
8450
8451         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8452                default_bootup_tracer);
8453         default_bootup_tracer = NULL;
8454
8455         return 0;
8456 }
8457
8458 fs_initcall(tracer_init_tracefs);
8459 late_initcall_sync(clear_boot_tracer);