GNU Linux-libre 4.14.313-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         /* Ignore the "tp_printk_stop_on_boot" param */
234         if (*str == '_')
235                 return 0;
236
237         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238                 tracepoint_printk = 1;
239         return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242
243 unsigned long long ns2usecs(u64 nsec)
244 {
245         nsec += 500;
246         do_div(nsec, 1000);
247         return nsec;
248 }
249
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS                                             \
252         (FUNCTION_DEFAULT_FLAGS |                                       \
253          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
254          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
255          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
256          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
260                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271         .trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273
274 LIST_HEAD(ftrace_trace_arrays);
275
276 int trace_array_get(struct trace_array *this_tr)
277 {
278         struct trace_array *tr;
279         int ret = -ENODEV;
280
281         mutex_lock(&trace_types_lock);
282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283                 if (tr == this_tr) {
284                         tr->ref++;
285                         ret = 0;
286                         break;
287                 }
288         }
289         mutex_unlock(&trace_types_lock);
290
291         return ret;
292 }
293
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296         WARN_ON(!this_tr->ref);
297         this_tr->ref--;
298 }
299
300 void trace_array_put(struct trace_array *this_tr)
301 {
302         mutex_lock(&trace_types_lock);
303         __trace_array_put(this_tr);
304         mutex_unlock(&trace_types_lock);
305 }
306
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308                               struct ring_buffer *buffer,
309                               struct ring_buffer_event *event)
310 {
311         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312             !filter_match_preds(call->filter, rec)) {
313                 __trace_event_discard_commit(buffer, event);
314                 return 1;
315         }
316
317         return 0;
318 }
319
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322         vfree(pid_list->pids);
323         kfree(pid_list);
324 }
325
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336         /*
337          * If pid_max changed after filtered_pids was created, we
338          * by default ignore all pids greater than the previous pid_max.
339          */
340         if (search_pid >= filtered_pids->pid_max)
341                 return false;
342
343         return test_bit(search_pid, filtered_pids->pids);
344 }
345
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358         /*
359          * Return false, because if filtered_pids does not exist,
360          * all pids are good to trace.
361          */
362         if (!filtered_pids)
363                 return false;
364
365         return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367
368 /**
369  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381                                   struct task_struct *self,
382                                   struct task_struct *task)
383 {
384         if (!pid_list)
385                 return;
386
387         /* For forks, we only add if the forking task is listed */
388         if (self) {
389                 if (!trace_find_filtered_pid(pid_list, self->pid))
390                         return;
391         }
392
393         /* Sorry, but we don't support pid_max changing after setting */
394         if (task->pid >= pid_list->pid_max)
395                 return;
396
397         /* "self" is set for forks, and NULL for exits */
398         if (self)
399                 set_bit(task->pid, pid_list->pids);
400         else
401                 clear_bit(task->pid, pid_list->pids);
402 }
403
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418         unsigned long pid = (unsigned long)v;
419
420         (*pos)++;
421
422         /* pid already is +1 of the actual prevous bit */
423         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424
425         /* Return pid + 1 to allow zero to be represented */
426         if (pid < pid_list->pid_max)
427                 return (void *)(pid + 1);
428
429         return NULL;
430 }
431
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445         unsigned long pid;
446         loff_t l = 0;
447
448         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449         if (pid >= pid_list->pid_max)
450                 return NULL;
451
452         /* Return pid + 1 so that zero can be the exit value */
453         for (pid++; pid && l < *pos;
454              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455                 ;
456         return (void *)pid;
457 }
458
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469         unsigned long pid = (unsigned long)v - 1;
470
471         seq_printf(m, "%lu\n", pid);
472         return 0;
473 }
474
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE            127
477
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479                     struct trace_pid_list **new_pid_list,
480                     const char __user *ubuf, size_t cnt)
481 {
482         struct trace_pid_list *pid_list;
483         struct trace_parser parser;
484         unsigned long val;
485         int nr_pids = 0;
486         ssize_t read = 0;
487         ssize_t ret = 0;
488         loff_t pos;
489         pid_t pid;
490
491         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492                 return -ENOMEM;
493
494         /*
495          * Always recreate a new array. The write is an all or nothing
496          * operation. Always create a new array when adding new pids by
497          * the user. If the operation fails, then the current list is
498          * not modified.
499          */
500         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501         if (!pid_list) {
502                 trace_parser_put(&parser);
503                 return -ENOMEM;
504         }
505
506         pid_list->pid_max = READ_ONCE(pid_max);
507
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 trace_parser_put(&parser);
515                 kfree(pid_list);
516                 return -ENOMEM;
517         }
518
519         if (filtered_pids) {
520                 /* copy the current bits to the new max */
521                 for_each_set_bit(pid, filtered_pids->pids,
522                                  filtered_pids->pid_max) {
523                         set_bit(pid, pid_list->pids);
524                         nr_pids++;
525                 }
526         }
527
528         while (cnt > 0) {
529
530                 pos = 0;
531
532                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
533                 if (ret < 0 || !trace_parser_loaded(&parser))
534                         break;
535
536                 read += ret;
537                 ubuf += ret;
538                 cnt -= ret;
539
540                 parser.buffer[parser.idx] = 0;
541
542                 ret = -EINVAL;
543                 if (kstrtoul(parser.buffer, 0, &val))
544                         break;
545                 if (val >= pid_list->pid_max)
546                         break;
547
548                 pid = (pid_t)val;
549
550                 set_bit(pid, pid_list->pids);
551                 nr_pids++;
552
553                 trace_parser_clear(&parser);
554                 ret = 0;
555         }
556         trace_parser_put(&parser);
557
558         if (ret < 0) {
559                 trace_free_pid_list(pid_list);
560                 return ret;
561         }
562
563         if (!nr_pids) {
564                 /* Cleared the list of pids */
565                 trace_free_pid_list(pid_list);
566                 read = ret;
567                 pid_list = NULL;
568         }
569
570         *new_pid_list = pid_list;
571
572         return read;
573 }
574
575 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
576 {
577         u64 ts;
578
579         /* Early boot up does not have a buffer yet */
580         if (!buf->buffer)
581                 return trace_clock_local();
582
583         ts = ring_buffer_time_stamp(buf->buffer, cpu);
584         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
585
586         return ts;
587 }
588
589 u64 ftrace_now(int cpu)
590 {
591         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
592 }
593
594 /**
595  * tracing_is_enabled - Show if global_trace has been disabled
596  *
597  * Shows if the global trace has been enabled or not. It uses the
598  * mirror flag "buffer_disabled" to be used in fast paths such as for
599  * the irqsoff tracer. But it may be inaccurate due to races. If you
600  * need to know the accurate state, use tracing_is_on() which is a little
601  * slower, but accurate.
602  */
603 int tracing_is_enabled(void)
604 {
605         /*
606          * For quick access (irqsoff uses this in fast path), just
607          * return the mirror variable of the state of the ring buffer.
608          * It's a little racy, but we don't really care.
609          */
610         smp_rmb();
611         return !global_trace.buffer_disabled;
612 }
613
614 /*
615  * trace_buf_size is the size in bytes that is allocated
616  * for a buffer. Note, the number of bytes is always rounded
617  * to page size.
618  *
619  * This number is purposely set to a low number of 16384.
620  * If the dump on oops happens, it will be much appreciated
621  * to not have to wait for all that output. Anyway this can be
622  * boot time and run time configurable.
623  */
624 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
625
626 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
627
628 /* trace_types holds a link list of available tracers. */
629 static struct tracer            *trace_types __read_mostly;
630
631 /*
632  * trace_types_lock is used to protect the trace_types list.
633  */
634 DEFINE_MUTEX(trace_types_lock);
635
636 /*
637  * serialize the access of the ring buffer
638  *
639  * ring buffer serializes readers, but it is low level protection.
640  * The validity of the events (which returns by ring_buffer_peek() ..etc)
641  * are not protected by ring buffer.
642  *
643  * The content of events may become garbage if we allow other process consumes
644  * these events concurrently:
645  *   A) the page of the consumed events may become a normal page
646  *      (not reader page) in ring buffer, and this page will be rewrited
647  *      by events producer.
648  *   B) The page of the consumed events may become a page for splice_read,
649  *      and this page will be returned to system.
650  *
651  * These primitives allow multi process access to different cpu ring buffer
652  * concurrently.
653  *
654  * These primitives don't distinguish read-only and read-consume access.
655  * Multi read-only access are also serialized.
656  */
657
658 #ifdef CONFIG_SMP
659 static DECLARE_RWSEM(all_cpu_access_lock);
660 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
661
662 static inline void trace_access_lock(int cpu)
663 {
664         if (cpu == RING_BUFFER_ALL_CPUS) {
665                 /* gain it for accessing the whole ring buffer. */
666                 down_write(&all_cpu_access_lock);
667         } else {
668                 /* gain it for accessing a cpu ring buffer. */
669
670                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
671                 down_read(&all_cpu_access_lock);
672
673                 /* Secondly block other access to this @cpu ring buffer. */
674                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
675         }
676 }
677
678 static inline void trace_access_unlock(int cpu)
679 {
680         if (cpu == RING_BUFFER_ALL_CPUS) {
681                 up_write(&all_cpu_access_lock);
682         } else {
683                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
684                 up_read(&all_cpu_access_lock);
685         }
686 }
687
688 static inline void trace_access_lock_init(void)
689 {
690         int cpu;
691
692         for_each_possible_cpu(cpu)
693                 mutex_init(&per_cpu(cpu_access_lock, cpu));
694 }
695
696 #else
697
698 static DEFINE_MUTEX(access_lock);
699
700 static inline void trace_access_lock(int cpu)
701 {
702         (void)cpu;
703         mutex_lock(&access_lock);
704 }
705
706 static inline void trace_access_unlock(int cpu)
707 {
708         (void)cpu;
709         mutex_unlock(&access_lock);
710 }
711
712 static inline void trace_access_lock_init(void)
713 {
714 }
715
716 #endif
717
718 #ifdef CONFIG_STACKTRACE
719 static void __ftrace_trace_stack(struct ring_buffer *buffer,
720                                  unsigned long flags,
721                                  int skip, int pc, struct pt_regs *regs);
722 static inline void ftrace_trace_stack(struct trace_array *tr,
723                                       struct ring_buffer *buffer,
724                                       unsigned long flags,
725                                       int skip, int pc, struct pt_regs *regs);
726
727 #else
728 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
729                                         unsigned long flags,
730                                         int skip, int pc, struct pt_regs *regs)
731 {
732 }
733 static inline void ftrace_trace_stack(struct trace_array *tr,
734                                       struct ring_buffer *buffer,
735                                       unsigned long flags,
736                                       int skip, int pc, struct pt_regs *regs)
737 {
738 }
739
740 #endif
741
742 static __always_inline void
743 trace_event_setup(struct ring_buffer_event *event,
744                   int type, unsigned long flags, int pc)
745 {
746         struct trace_entry *ent = ring_buffer_event_data(event);
747
748         tracing_generic_entry_update(ent, flags, pc);
749         ent->type = type;
750 }
751
752 static __always_inline struct ring_buffer_event *
753 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
754                           int type,
755                           unsigned long len,
756                           unsigned long flags, int pc)
757 {
758         struct ring_buffer_event *event;
759
760         event = ring_buffer_lock_reserve(buffer, len);
761         if (event != NULL)
762                 trace_event_setup(event, type, flags, pc);
763
764         return event;
765 }
766
767 void tracer_tracing_on(struct trace_array *tr)
768 {
769         if (tr->trace_buffer.buffer)
770                 ring_buffer_record_on(tr->trace_buffer.buffer);
771         /*
772          * This flag is looked at when buffers haven't been allocated
773          * yet, or by some tracers (like irqsoff), that just want to
774          * know if the ring buffer has been disabled, but it can handle
775          * races of where it gets disabled but we still do a record.
776          * As the check is in the fast path of the tracers, it is more
777          * important to be fast than accurate.
778          */
779         tr->buffer_disabled = 0;
780         /* Make the flag seen by readers */
781         smp_wmb();
782 }
783
784 /**
785  * tracing_on - enable tracing buffers
786  *
787  * This function enables tracing buffers that may have been
788  * disabled with tracing_off.
789  */
790 void tracing_on(void)
791 {
792         tracer_tracing_on(&global_trace);
793 }
794 EXPORT_SYMBOL_GPL(tracing_on);
795
796
797 static __always_inline void
798 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
799 {
800         __this_cpu_write(trace_taskinfo_save, true);
801
802         /* If this is the temp buffer, we need to commit fully */
803         if (this_cpu_read(trace_buffered_event) == event) {
804                 /* Length is in event->array[0] */
805                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
806                 /* Release the temp buffer */
807                 this_cpu_dec(trace_buffered_event_cnt);
808         } else
809                 ring_buffer_unlock_commit(buffer, event);
810 }
811
812 /**
813  * __trace_puts - write a constant string into the trace buffer.
814  * @ip:    The address of the caller
815  * @str:   The constant string to write
816  * @size:  The size of the string.
817  */
818 int __trace_puts(unsigned long ip, const char *str, int size)
819 {
820         struct ring_buffer_event *event;
821         struct ring_buffer *buffer;
822         struct print_entry *entry;
823         unsigned long irq_flags;
824         int alloc;
825         int pc;
826
827         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
828                 return 0;
829
830         pc = preempt_count();
831
832         if (unlikely(tracing_selftest_running || tracing_disabled))
833                 return 0;
834
835         alloc = sizeof(*entry) + size + 2; /* possible \n added */
836
837         local_save_flags(irq_flags);
838         buffer = global_trace.trace_buffer.buffer;
839         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
840                                             irq_flags, pc);
841         if (!event)
842                 return 0;
843
844         entry = ring_buffer_event_data(event);
845         entry->ip = ip;
846
847         memcpy(&entry->buf, str, size);
848
849         /* Add a newline if necessary */
850         if (entry->buf[size - 1] != '\n') {
851                 entry->buf[size] = '\n';
852                 entry->buf[size + 1] = '\0';
853         } else
854                 entry->buf[size] = '\0';
855
856         __buffer_unlock_commit(buffer, event);
857         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
858
859         return size;
860 }
861 EXPORT_SYMBOL_GPL(__trace_puts);
862
863 /**
864  * __trace_bputs - write the pointer to a constant string into trace buffer
865  * @ip:    The address of the caller
866  * @str:   The constant string to write to the buffer to
867  */
868 int __trace_bputs(unsigned long ip, const char *str)
869 {
870         struct ring_buffer_event *event;
871         struct ring_buffer *buffer;
872         struct bputs_entry *entry;
873         unsigned long irq_flags;
874         int size = sizeof(struct bputs_entry);
875         int pc;
876
877         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
878                 return 0;
879
880         pc = preempt_count();
881
882         if (unlikely(tracing_selftest_running || tracing_disabled))
883                 return 0;
884
885         local_save_flags(irq_flags);
886         buffer = global_trace.trace_buffer.buffer;
887         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
888                                             irq_flags, pc);
889         if (!event)
890                 return 0;
891
892         entry = ring_buffer_event_data(event);
893         entry->ip                       = ip;
894         entry->str                      = str;
895
896         __buffer_unlock_commit(buffer, event);
897         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898
899         return 1;
900 }
901 EXPORT_SYMBOL_GPL(__trace_bputs);
902
903 #ifdef CONFIG_TRACER_SNAPSHOT
904 void tracing_snapshot_instance(struct trace_array *tr)
905 {
906         struct tracer *tracer = tr->current_trace;
907         unsigned long flags;
908
909         if (in_nmi()) {
910                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
911                 internal_trace_puts("*** snapshot is being ignored        ***\n");
912                 return;
913         }
914
915         if (!tr->allocated_snapshot) {
916                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
917                 internal_trace_puts("*** stopping trace here!   ***\n");
918                 tracing_off();
919                 return;
920         }
921
922         /* Note, snapshot can not be used when the tracer uses it */
923         if (tracer->use_max_tr) {
924                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
925                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
926                 return;
927         }
928
929         local_irq_save(flags);
930         update_max_tr(tr, current, smp_processor_id());
931         local_irq_restore(flags);
932 }
933
934 /**
935  * trace_snapshot - take a snapshot of the current buffer.
936  *
937  * This causes a swap between the snapshot buffer and the current live
938  * tracing buffer. You can use this to take snapshots of the live
939  * trace when some condition is triggered, but continue to trace.
940  *
941  * Note, make sure to allocate the snapshot with either
942  * a tracing_snapshot_alloc(), or by doing it manually
943  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
944  *
945  * If the snapshot buffer is not allocated, it will stop tracing.
946  * Basically making a permanent snapshot.
947  */
948 void tracing_snapshot(void)
949 {
950         struct trace_array *tr = &global_trace;
951
952         tracing_snapshot_instance(tr);
953 }
954 EXPORT_SYMBOL_GPL(tracing_snapshot);
955
956 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
957                                         struct trace_buffer *size_buf, int cpu_id);
958 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
959
960 int tracing_alloc_snapshot_instance(struct trace_array *tr)
961 {
962         int ret;
963
964         if (!tr->allocated_snapshot) {
965
966                 /* allocate spare buffer */
967                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
968                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
969                 if (ret < 0)
970                         return ret;
971
972                 tr->allocated_snapshot = true;
973         }
974
975         return 0;
976 }
977
978 static void free_snapshot(struct trace_array *tr)
979 {
980         /*
981          * We don't free the ring buffer. instead, resize it because
982          * The max_tr ring buffer has some state (e.g. ring->clock) and
983          * we want preserve it.
984          */
985         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
986         set_buffer_entries(&tr->max_buffer, 1);
987         tracing_reset_online_cpus(&tr->max_buffer);
988         tr->allocated_snapshot = false;
989 }
990
991 /**
992  * tracing_alloc_snapshot - allocate snapshot buffer.
993  *
994  * This only allocates the snapshot buffer if it isn't already
995  * allocated - it doesn't also take a snapshot.
996  *
997  * This is meant to be used in cases where the snapshot buffer needs
998  * to be set up for events that can't sleep but need to be able to
999  * trigger a snapshot.
1000  */
1001 int tracing_alloc_snapshot(void)
1002 {
1003         struct trace_array *tr = &global_trace;
1004         int ret;
1005
1006         ret = tracing_alloc_snapshot_instance(tr);
1007         WARN_ON(ret < 0);
1008
1009         return ret;
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1012
1013 /**
1014  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1015  *
1016  * This is similar to trace_snapshot(), but it will allocate the
1017  * snapshot buffer if it isn't already allocated. Use this only
1018  * where it is safe to sleep, as the allocation may sleep.
1019  *
1020  * This causes a swap between the snapshot buffer and the current live
1021  * tracing buffer. You can use this to take snapshots of the live
1022  * trace when some condition is triggered, but continue to trace.
1023  */
1024 void tracing_snapshot_alloc(void)
1025 {
1026         int ret;
1027
1028         ret = tracing_alloc_snapshot();
1029         if (ret < 0)
1030                 return;
1031
1032         tracing_snapshot();
1033 }
1034 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1035 #else
1036 void tracing_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_snapshot);
1041 int tracing_alloc_snapshot(void)
1042 {
1043         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1044         return -ENODEV;
1045 }
1046 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1047 void tracing_snapshot_alloc(void)
1048 {
1049         /* Give warning */
1050         tracing_snapshot();
1051 }
1052 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1053 #endif /* CONFIG_TRACER_SNAPSHOT */
1054
1055 void tracer_tracing_off(struct trace_array *tr)
1056 {
1057         if (tr->trace_buffer.buffer)
1058                 ring_buffer_record_off(tr->trace_buffer.buffer);
1059         /*
1060          * This flag is looked at when buffers haven't been allocated
1061          * yet, or by some tracers (like irqsoff), that just want to
1062          * know if the ring buffer has been disabled, but it can handle
1063          * races of where it gets disabled but we still do a record.
1064          * As the check is in the fast path of the tracers, it is more
1065          * important to be fast than accurate.
1066          */
1067         tr->buffer_disabled = 1;
1068         /* Make the flag seen by readers */
1069         smp_wmb();
1070 }
1071
1072 /**
1073  * tracing_off - turn off tracing buffers
1074  *
1075  * This function stops the tracing buffers from recording data.
1076  * It does not disable any overhead the tracers themselves may
1077  * be causing. This function simply causes all recording to
1078  * the ring buffers to fail.
1079  */
1080 void tracing_off(void)
1081 {
1082         tracer_tracing_off(&global_trace);
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_off);
1085
1086 void disable_trace_on_warning(void)
1087 {
1088         if (__disable_trace_on_warning)
1089                 tracing_off();
1090 }
1091
1092 /**
1093  * tracer_tracing_is_on - show real state of ring buffer enabled
1094  * @tr : the trace array to know if ring buffer is enabled
1095  *
1096  * Shows real state of the ring buffer if it is enabled or not.
1097  */
1098 int tracer_tracing_is_on(struct trace_array *tr)
1099 {
1100         if (tr->trace_buffer.buffer)
1101                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1102         return !tr->buffer_disabled;
1103 }
1104
1105 /**
1106  * tracing_is_on - show state of ring buffers enabled
1107  */
1108 int tracing_is_on(void)
1109 {
1110         return tracer_tracing_is_on(&global_trace);
1111 }
1112 EXPORT_SYMBOL_GPL(tracing_is_on);
1113
1114 static int __init set_buf_size(char *str)
1115 {
1116         unsigned long buf_size;
1117
1118         if (!str)
1119                 return 0;
1120         buf_size = memparse(str, &str);
1121         /*
1122          * nr_entries can not be zero and the startup
1123          * tests require some buffer space. Therefore
1124          * ensure we have at least 4096 bytes of buffer.
1125          */
1126         trace_buf_size = max(4096UL, buf_size);
1127         return 1;
1128 }
1129 __setup("trace_buf_size=", set_buf_size);
1130
1131 static int __init set_tracing_thresh(char *str)
1132 {
1133         unsigned long threshold;
1134         int ret;
1135
1136         if (!str)
1137                 return 0;
1138         ret = kstrtoul(str, 0, &threshold);
1139         if (ret < 0)
1140                 return 0;
1141         tracing_thresh = threshold * 1000;
1142         return 1;
1143 }
1144 __setup("tracing_thresh=", set_tracing_thresh);
1145
1146 unsigned long nsecs_to_usecs(unsigned long nsecs)
1147 {
1148         return nsecs / 1000;
1149 }
1150
1151 /*
1152  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1153  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1154  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1155  * of strings in the order that the evals (enum) were defined.
1156  */
1157 #undef C
1158 #define C(a, b) b
1159
1160 /* These must match the bit postions in trace_iterator_flags */
1161 static const char *trace_options[] = {
1162         TRACE_FLAGS
1163         NULL
1164 };
1165
1166 static struct {
1167         u64 (*func)(void);
1168         const char *name;
1169         int in_ns;              /* is this clock in nanoseconds? */
1170 } trace_clocks[] = {
1171         { trace_clock_local,            "local",        1 },
1172         { trace_clock_global,           "global",       1 },
1173         { trace_clock_counter,          "counter",      0 },
1174         { trace_clock_jiffies,          "uptime",       0 },
1175         { trace_clock,                  "perf",         1 },
1176         { ktime_get_mono_fast_ns,       "mono",         1 },
1177         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1178         { ktime_get_boot_fast_ns,       "boot",         1 },
1179         ARCH_TRACE_CLOCKS
1180 };
1181
1182 /*
1183  * trace_parser_get_init - gets the buffer for trace parser
1184  */
1185 int trace_parser_get_init(struct trace_parser *parser, int size)
1186 {
1187         memset(parser, 0, sizeof(*parser));
1188
1189         parser->buffer = kmalloc(size, GFP_KERNEL);
1190         if (!parser->buffer)
1191                 return 1;
1192
1193         parser->size = size;
1194         return 0;
1195 }
1196
1197 /*
1198  * trace_parser_put - frees the buffer for trace parser
1199  */
1200 void trace_parser_put(struct trace_parser *parser)
1201 {
1202         kfree(parser->buffer);
1203         parser->buffer = NULL;
1204 }
1205
1206 /*
1207  * trace_get_user - reads the user input string separated by  space
1208  * (matched by isspace(ch))
1209  *
1210  * For each string found the 'struct trace_parser' is updated,
1211  * and the function returns.
1212  *
1213  * Returns number of bytes read.
1214  *
1215  * See kernel/trace/trace.h for 'struct trace_parser' details.
1216  */
1217 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1218         size_t cnt, loff_t *ppos)
1219 {
1220         char ch;
1221         size_t read = 0;
1222         ssize_t ret;
1223
1224         if (!*ppos)
1225                 trace_parser_clear(parser);
1226
1227         ret = get_user(ch, ubuf++);
1228         if (ret)
1229                 goto out;
1230
1231         read++;
1232         cnt--;
1233
1234         /*
1235          * The parser is not finished with the last write,
1236          * continue reading the user input without skipping spaces.
1237          */
1238         if (!parser->cont) {
1239                 /* skip white space */
1240                 while (cnt && isspace(ch)) {
1241                         ret = get_user(ch, ubuf++);
1242                         if (ret)
1243                                 goto out;
1244                         read++;
1245                         cnt--;
1246                 }
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch)) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254
1255                 parser->idx = 0;
1256         }
1257
1258         /* read the non-space input */
1259         while (cnt && !isspace(ch)) {
1260                 if (parser->idx < parser->size - 1)
1261                         parser->buffer[parser->idx++] = ch;
1262                 else {
1263                         ret = -EINVAL;
1264                         goto out;
1265                 }
1266                 ret = get_user(ch, ubuf++);
1267                 if (ret)
1268                         goto out;
1269                 read++;
1270                 cnt--;
1271         }
1272
1273         /* We either got finished input or we have to wait for another call. */
1274         if (isspace(ch)) {
1275                 parser->buffer[parser->idx] = 0;
1276                 parser->cont = false;
1277         } else if (parser->idx < parser->size - 1) {
1278                 parser->cont = true;
1279                 parser->buffer[parser->idx++] = ch;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364
1365         if (tr->stop_count)
1366                 return;
1367
1368         WARN_ON_ONCE(!irqs_disabled());
1369
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         /* Inherit the recordable setting from trace_buffer */
1379         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1380                 ring_buffer_record_on(tr->max_buffer.buffer);
1381         else
1382                 ring_buffer_record_off(tr->max_buffer.buffer);
1383
1384         buf = tr->trace_buffer.buffer;
1385         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1386         tr->max_buffer.buffer = buf;
1387
1388         __update_max_tr(tr, tsk, cpu);
1389         arch_spin_unlock(&tr->max_lock);
1390 }
1391
1392 /**
1393  * update_max_tr_single - only copy one trace over, and reset the rest
1394  * @tr - tracer
1395  * @tsk - task with the latency
1396  * @cpu - the cpu of the buffer to copy.
1397  *
1398  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1399  */
1400 void
1401 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1402 {
1403         int ret;
1404
1405         if (tr->stop_count)
1406                 return;
1407
1408         WARN_ON_ONCE(!irqs_disabled());
1409         if (!tr->allocated_snapshot) {
1410                 /* Only the nop tracer should hit this when disabling */
1411                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1412                 return;
1413         }
1414
1415         arch_spin_lock(&tr->max_lock);
1416
1417         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1418
1419         if (ret == -EBUSY) {
1420                 /*
1421                  * We failed to swap the buffer due to a commit taking
1422                  * place on this CPU. We fail to record, but we reset
1423                  * the max trace buffer (no one writes directly to it)
1424                  * and flag that it failed.
1425                  */
1426                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1427                         "Failed to swap buffers due to commit in progress\n");
1428         }
1429
1430         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1431
1432         __update_max_tr(tr, tsk, cpu);
1433         arch_spin_unlock(&tr->max_lock);
1434 }
1435 #endif /* CONFIG_TRACER_MAX_TRACE */
1436
1437 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1438 {
1439         /* Iterators are static, they should be filled or empty */
1440         if (trace_buffer_iter(iter, iter->cpu_file))
1441                 return 0;
1442
1443         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1444                                 full);
1445 }
1446
1447 #ifdef CONFIG_FTRACE_STARTUP_TEST
1448 static bool selftests_can_run;
1449
1450 struct trace_selftests {
1451         struct list_head                list;
1452         struct tracer                   *type;
1453 };
1454
1455 static LIST_HEAD(postponed_selftests);
1456
1457 static int save_selftest(struct tracer *type)
1458 {
1459         struct trace_selftests *selftest;
1460
1461         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1462         if (!selftest)
1463                 return -ENOMEM;
1464
1465         selftest->type = type;
1466         list_add(&selftest->list, &postponed_selftests);
1467         return 0;
1468 }
1469
1470 static int run_tracer_selftest(struct tracer *type)
1471 {
1472         struct trace_array *tr = &global_trace;
1473         struct tracer *saved_tracer = tr->current_trace;
1474         int ret;
1475
1476         if (!type->selftest || tracing_selftest_disabled)
1477                 return 0;
1478
1479         /*
1480          * If a tracer registers early in boot up (before scheduling is
1481          * initialized and such), then do not run its selftests yet.
1482          * Instead, run it a little later in the boot process.
1483          */
1484         if (!selftests_can_run)
1485                 return save_selftest(type);
1486
1487         /*
1488          * Run a selftest on this tracer.
1489          * Here we reset the trace buffer, and set the current
1490          * tracer to be this tracer. The tracer can then run some
1491          * internal tracing to verify that everything is in order.
1492          * If we fail, we do not register this tracer.
1493          */
1494         tracing_reset_online_cpus(&tr->trace_buffer);
1495
1496         tr->current_trace = type;
1497
1498 #ifdef CONFIG_TRACER_MAX_TRACE
1499         if (type->use_max_tr) {
1500                 /* If we expanded the buffers, make sure the max is expanded too */
1501                 if (ring_buffer_expanded)
1502                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1503                                            RING_BUFFER_ALL_CPUS);
1504                 tr->allocated_snapshot = true;
1505         }
1506 #endif
1507
1508         /* the test is responsible for initializing and enabling */
1509         pr_info("Testing tracer %s: ", type->name);
1510         ret = type->selftest(type, tr);
1511         /* the test is responsible for resetting too */
1512         tr->current_trace = saved_tracer;
1513         if (ret) {
1514                 printk(KERN_CONT "FAILED!\n");
1515                 /* Add the warning after printing 'FAILED' */
1516                 WARN_ON(1);
1517                 return -1;
1518         }
1519         /* Only reset on passing, to avoid touching corrupted buffers */
1520         tracing_reset_online_cpus(&tr->trace_buffer);
1521
1522 #ifdef CONFIG_TRACER_MAX_TRACE
1523         if (type->use_max_tr) {
1524                 tr->allocated_snapshot = false;
1525
1526                 /* Shrink the max buffer again */
1527                 if (ring_buffer_expanded)
1528                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1529                                            RING_BUFFER_ALL_CPUS);
1530         }
1531 #endif
1532
1533         printk(KERN_CONT "PASSED\n");
1534         return 0;
1535 }
1536
1537 static __init int init_trace_selftests(void)
1538 {
1539         struct trace_selftests *p, *n;
1540         struct tracer *t, **last;
1541         int ret;
1542
1543         selftests_can_run = true;
1544
1545         mutex_lock(&trace_types_lock);
1546
1547         if (list_empty(&postponed_selftests))
1548                 goto out;
1549
1550         pr_info("Running postponed tracer tests:\n");
1551
1552         tracing_selftest_running = true;
1553         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1554                 ret = run_tracer_selftest(p->type);
1555                 /* If the test fails, then warn and remove from available_tracers */
1556                 if (ret < 0) {
1557                         WARN(1, "tracer: %s failed selftest, disabling\n",
1558                              p->type->name);
1559                         last = &trace_types;
1560                         for (t = trace_types; t; t = t->next) {
1561                                 if (t == p->type) {
1562                                         *last = t->next;
1563                                         break;
1564                                 }
1565                                 last = &t->next;
1566                         }
1567                 }
1568                 list_del(&p->list);
1569                 kfree(p);
1570         }
1571         tracing_selftest_running = false;
1572
1573  out:
1574         mutex_unlock(&trace_types_lock);
1575
1576         return 0;
1577 }
1578 core_initcall(init_trace_selftests);
1579 #else
1580 static inline int run_tracer_selftest(struct tracer *type)
1581 {
1582         return 0;
1583 }
1584 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1585
1586 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1587
1588 static void __init apply_trace_boot_options(void);
1589
1590 /**
1591  * register_tracer - register a tracer with the ftrace system.
1592  * @type - the plugin for the tracer
1593  *
1594  * Register a new plugin tracer.
1595  */
1596 int __init register_tracer(struct tracer *type)
1597 {
1598         struct tracer *t;
1599         int ret = 0;
1600
1601         if (!type->name) {
1602                 pr_info("Tracer must have a name\n");
1603                 return -1;
1604         }
1605
1606         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1607                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1608                 return -1;
1609         }
1610
1611         mutex_lock(&trace_types_lock);
1612
1613         tracing_selftest_running = true;
1614
1615         for (t = trace_types; t; t = t->next) {
1616                 if (strcmp(type->name, t->name) == 0) {
1617                         /* already found */
1618                         pr_info("Tracer %s already registered\n",
1619                                 type->name);
1620                         ret = -1;
1621                         goto out;
1622                 }
1623         }
1624
1625         if (!type->set_flag)
1626                 type->set_flag = &dummy_set_flag;
1627         if (!type->flags) {
1628                 /*allocate a dummy tracer_flags*/
1629                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1630                 if (!type->flags) {
1631                         ret = -ENOMEM;
1632                         goto out;
1633                 }
1634                 type->flags->val = 0;
1635                 type->flags->opts = dummy_tracer_opt;
1636         } else
1637                 if (!type->flags->opts)
1638                         type->flags->opts = dummy_tracer_opt;
1639
1640         /* store the tracer for __set_tracer_option */
1641         type->flags->trace = type;
1642
1643         ret = run_tracer_selftest(type);
1644         if (ret < 0)
1645                 goto out;
1646
1647         type->next = trace_types;
1648         trace_types = type;
1649         add_tracer_options(&global_trace, type);
1650
1651  out:
1652         tracing_selftest_running = false;
1653         mutex_unlock(&trace_types_lock);
1654
1655         if (ret || !default_bootup_tracer)
1656                 goto out_unlock;
1657
1658         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1659                 goto out_unlock;
1660
1661         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1662         /* Do we want this tracer to start on bootup? */
1663         tracing_set_tracer(&global_trace, type->name);
1664         default_bootup_tracer = NULL;
1665
1666         apply_trace_boot_options();
1667
1668         /* disable other selftests, since this will break it. */
1669         tracing_selftest_disabled = true;
1670 #ifdef CONFIG_FTRACE_STARTUP_TEST
1671         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1672                type->name);
1673 #endif
1674
1675  out_unlock:
1676         return ret;
1677 }
1678
1679 void tracing_reset(struct trace_buffer *buf, int cpu)
1680 {
1681         struct ring_buffer *buffer = buf->buffer;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690         ring_buffer_reset_cpu(buffer, cpu);
1691
1692         ring_buffer_record_enable(buffer);
1693 }
1694
1695 void tracing_reset_online_cpus(struct trace_buffer *buf)
1696 {
1697         struct ring_buffer *buffer = buf->buffer;
1698         int cpu;
1699
1700         if (!buffer)
1701                 return;
1702
1703         ring_buffer_record_disable(buffer);
1704
1705         /* Make sure all commits have finished */
1706         synchronize_sched();
1707
1708         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1709
1710         for_each_online_cpu(cpu)
1711                 ring_buffer_reset_cpu(buffer, cpu);
1712
1713         ring_buffer_record_enable(buffer);
1714 }
1715
1716 /* Must have trace_types_lock held */
1717 void tracing_reset_all_online_cpus(void)
1718 {
1719         struct trace_array *tr;
1720
1721         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1722                 if (!tr->clear_trace)
1723                         continue;
1724                 tr->clear_trace = false;
1725                 tracing_reset_online_cpus(&tr->trace_buffer);
1726 #ifdef CONFIG_TRACER_MAX_TRACE
1727                 tracing_reset_online_cpus(&tr->max_buffer);
1728 #endif
1729         }
1730 }
1731
1732 /*
1733  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1734  * is the tgid last observed corresponding to pid=i.
1735  */
1736 static int *tgid_map;
1737
1738 /* The maximum valid index into tgid_map. */
1739 static size_t tgid_map_max;
1740
1741 #define SAVED_CMDLINES_DEFAULT 128
1742 #define NO_CMDLINE_MAP UINT_MAX
1743 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1744 struct saved_cmdlines_buffer {
1745         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1746         unsigned *map_cmdline_to_pid;
1747         unsigned cmdline_num;
1748         int cmdline_idx;
1749         char *saved_cmdlines;
1750 };
1751 static struct saved_cmdlines_buffer *savedcmd;
1752
1753 static inline char *get_saved_cmdlines(int idx)
1754 {
1755         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1756 }
1757
1758 static inline void set_cmdline(int idx, const char *cmdline)
1759 {
1760         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1761 }
1762
1763 static int allocate_cmdlines_buffer(unsigned int val,
1764                                     struct saved_cmdlines_buffer *s)
1765 {
1766         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1767                                         GFP_KERNEL);
1768         if (!s->map_cmdline_to_pid)
1769                 return -ENOMEM;
1770
1771         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1772         if (!s->saved_cmdlines) {
1773                 kfree(s->map_cmdline_to_pid);
1774                 return -ENOMEM;
1775         }
1776
1777         s->cmdline_idx = 0;
1778         s->cmdline_num = val;
1779         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1780                sizeof(s->map_pid_to_cmdline));
1781         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1782                val * sizeof(*s->map_cmdline_to_pid));
1783
1784         return 0;
1785 }
1786
1787 static int trace_create_savedcmd(void)
1788 {
1789         int ret;
1790
1791         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1792         if (!savedcmd)
1793                 return -ENOMEM;
1794
1795         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1796         if (ret < 0) {
1797                 kfree(savedcmd);
1798                 savedcmd = NULL;
1799                 return -ENOMEM;
1800         }
1801
1802         return 0;
1803 }
1804
1805 int is_tracing_stopped(void)
1806 {
1807         return global_trace.stop_count;
1808 }
1809
1810 /**
1811  * tracing_start - quick start of the tracer
1812  *
1813  * If tracing is enabled but was stopped by tracing_stop,
1814  * this will start the tracer back up.
1815  */
1816 void tracing_start(void)
1817 {
1818         struct ring_buffer *buffer;
1819         unsigned long flags;
1820
1821         if (tracing_disabled)
1822                 return;
1823
1824         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1825         if (--global_trace.stop_count) {
1826                 if (global_trace.stop_count < 0) {
1827                         /* Someone screwed up their debugging */
1828                         WARN_ON_ONCE(1);
1829                         global_trace.stop_count = 0;
1830                 }
1831                 goto out;
1832         }
1833
1834         /* Prevent the buffers from switching */
1835         arch_spin_lock(&global_trace.max_lock);
1836
1837         buffer = global_trace.trace_buffer.buffer;
1838         if (buffer)
1839                 ring_buffer_record_enable(buffer);
1840
1841 #ifdef CONFIG_TRACER_MAX_TRACE
1842         buffer = global_trace.max_buffer.buffer;
1843         if (buffer)
1844                 ring_buffer_record_enable(buffer);
1845 #endif
1846
1847         arch_spin_unlock(&global_trace.max_lock);
1848
1849  out:
1850         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1851 }
1852
1853 static void tracing_start_tr(struct trace_array *tr)
1854 {
1855         struct ring_buffer *buffer;
1856         unsigned long flags;
1857
1858         if (tracing_disabled)
1859                 return;
1860
1861         /* If global, we need to also start the max tracer */
1862         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1863                 return tracing_start();
1864
1865         raw_spin_lock_irqsave(&tr->start_lock, flags);
1866
1867         if (--tr->stop_count) {
1868                 if (tr->stop_count < 0) {
1869                         /* Someone screwed up their debugging */
1870                         WARN_ON_ONCE(1);
1871                         tr->stop_count = 0;
1872                 }
1873                 goto out;
1874         }
1875
1876         buffer = tr->trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_enable(buffer);
1879
1880  out:
1881         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1882 }
1883
1884 /**
1885  * tracing_stop - quick stop of the tracer
1886  *
1887  * Light weight way to stop tracing. Use in conjunction with
1888  * tracing_start.
1889  */
1890 void tracing_stop(void)
1891 {
1892         struct ring_buffer *buffer;
1893         unsigned long flags;
1894
1895         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1896         if (global_trace.stop_count++)
1897                 goto out;
1898
1899         /* Prevent the buffers from switching */
1900         arch_spin_lock(&global_trace.max_lock);
1901
1902         buffer = global_trace.trace_buffer.buffer;
1903         if (buffer)
1904                 ring_buffer_record_disable(buffer);
1905
1906 #ifdef CONFIG_TRACER_MAX_TRACE
1907         buffer = global_trace.max_buffer.buffer;
1908         if (buffer)
1909                 ring_buffer_record_disable(buffer);
1910 #endif
1911
1912         arch_spin_unlock(&global_trace.max_lock);
1913
1914  out:
1915         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1916 }
1917
1918 static void tracing_stop_tr(struct trace_array *tr)
1919 {
1920         struct ring_buffer *buffer;
1921         unsigned long flags;
1922
1923         /* If global, we need to also stop the max tracer */
1924         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1925                 return tracing_stop();
1926
1927         raw_spin_lock_irqsave(&tr->start_lock, flags);
1928         if (tr->stop_count++)
1929                 goto out;
1930
1931         buffer = tr->trace_buffer.buffer;
1932         if (buffer)
1933                 ring_buffer_record_disable(buffer);
1934
1935  out:
1936         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1937 }
1938
1939 static int trace_save_cmdline(struct task_struct *tsk)
1940 {
1941         unsigned tpid, idx;
1942
1943         /* treat recording of idle task as a success */
1944         if (!tsk->pid)
1945                 return 1;
1946
1947         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1948
1949         /*
1950          * It's not the end of the world if we don't get
1951          * the lock, but we also don't want to spin
1952          * nor do we want to disable interrupts,
1953          * so if we miss here, then better luck next time.
1954          */
1955         if (!arch_spin_trylock(&trace_cmdline_lock))
1956                 return 0;
1957
1958         idx = savedcmd->map_pid_to_cmdline[tpid];
1959         if (idx == NO_CMDLINE_MAP) {
1960                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1961
1962                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1963                 savedcmd->cmdline_idx = idx;
1964         }
1965
1966         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1967         set_cmdline(idx, tsk->comm);
1968
1969         arch_spin_unlock(&trace_cmdline_lock);
1970
1971         return 1;
1972 }
1973
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976         unsigned map;
1977         int tpid;
1978
1979         if (!pid) {
1980                 strcpy(comm, "<idle>");
1981                 return;
1982         }
1983
1984         if (WARN_ON_ONCE(pid < 0)) {
1985                 strcpy(comm, "<XXX>");
1986                 return;
1987         }
1988
1989         tpid = pid & (PID_MAX_DEFAULT - 1);
1990         map = savedcmd->map_pid_to_cmdline[tpid];
1991         if (map != NO_CMDLINE_MAP) {
1992                 tpid = savedcmd->map_cmdline_to_pid[map];
1993                 if (tpid == pid) {
1994                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1995                         return;
1996                 }
1997         }
1998         strcpy(comm, "<...>");
1999 }
2000
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003         preempt_disable();
2004         arch_spin_lock(&trace_cmdline_lock);
2005
2006         __trace_find_cmdline(pid, comm);
2007
2008         arch_spin_unlock(&trace_cmdline_lock);
2009         preempt_enable();
2010 }
2011
2012 static int *trace_find_tgid_ptr(int pid)
2013 {
2014         /*
2015          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2016          * if we observe a non-NULL tgid_map then we also observe the correct
2017          * tgid_map_max.
2018          */
2019         int *map = smp_load_acquire(&tgid_map);
2020
2021         if (unlikely(!map || pid > tgid_map_max))
2022                 return NULL;
2023
2024         return &map[pid];
2025 }
2026
2027 int trace_find_tgid(int pid)
2028 {
2029         int *ptr = trace_find_tgid_ptr(pid);
2030
2031         return ptr ? *ptr : 0;
2032 }
2033
2034 static int trace_save_tgid(struct task_struct *tsk)
2035 {
2036         int *ptr;
2037
2038         /* treat recording of idle task as a success */
2039         if (!tsk->pid)
2040                 return 1;
2041
2042         ptr = trace_find_tgid_ptr(tsk->pid);
2043         if (!ptr)
2044                 return 0;
2045
2046         *ptr = tsk->tgid;
2047         return 1;
2048 }
2049
2050 static bool tracing_record_taskinfo_skip(int flags)
2051 {
2052         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2053                 return true;
2054         if (!__this_cpu_read(trace_taskinfo_save))
2055                 return true;
2056         return false;
2057 }
2058
2059 /**
2060  * tracing_record_taskinfo - record the task info of a task
2061  *
2062  * @task  - task to record
2063  * @flags - TRACE_RECORD_CMDLINE for recording comm
2064  *        - TRACE_RECORD_TGID for recording tgid
2065  */
2066 void tracing_record_taskinfo(struct task_struct *task, int flags)
2067 {
2068         bool done;
2069
2070         if (tracing_record_taskinfo_skip(flags))
2071                 return;
2072
2073         /*
2074          * Record as much task information as possible. If some fail, continue
2075          * to try to record the others.
2076          */
2077         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2078         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2079
2080         /* If recording any information failed, retry again soon. */
2081         if (!done)
2082                 return;
2083
2084         __this_cpu_write(trace_taskinfo_save, false);
2085 }
2086
2087 /**
2088  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2089  *
2090  * @prev - previous task during sched_switch
2091  * @next - next task during sched_switch
2092  * @flags - TRACE_RECORD_CMDLINE for recording comm
2093  *          TRACE_RECORD_TGID for recording tgid
2094  */
2095 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2096                                           struct task_struct *next, int flags)
2097 {
2098         bool done;
2099
2100         if (tracing_record_taskinfo_skip(flags))
2101                 return;
2102
2103         /*
2104          * Record as much task information as possible. If some fail, continue
2105          * to try to record the others.
2106          */
2107         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2108         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2109         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2110         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2111
2112         /* If recording any information failed, retry again soon. */
2113         if (!done)
2114                 return;
2115
2116         __this_cpu_write(trace_taskinfo_save, false);
2117 }
2118
2119 /* Helpers to record a specific task information */
2120 void tracing_record_cmdline(struct task_struct *task)
2121 {
2122         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2123 }
2124
2125 void tracing_record_tgid(struct task_struct *task)
2126 {
2127         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2128 }
2129
2130 /*
2131  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2132  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2133  * simplifies those functions and keeps them in sync.
2134  */
2135 enum print_line_t trace_handle_return(struct trace_seq *s)
2136 {
2137         return trace_seq_has_overflowed(s) ?
2138                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2139 }
2140 EXPORT_SYMBOL_GPL(trace_handle_return);
2141
2142 void
2143 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2144                              int pc)
2145 {
2146         struct task_struct *tsk = current;
2147
2148         entry->preempt_count            = pc & 0xff;
2149         entry->pid                      = (tsk) ? tsk->pid : 0;
2150         entry->flags =
2151 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2152                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2153 #else
2154                 TRACE_FLAG_IRQS_NOSUPPORT |
2155 #endif
2156                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2157                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2158                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2159                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2160                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2161 }
2162 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2163
2164 struct ring_buffer_event *
2165 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2166                           int type,
2167                           unsigned long len,
2168                           unsigned long flags, int pc)
2169 {
2170         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2171 }
2172
2173 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2174 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2175 static int trace_buffered_event_ref;
2176
2177 /**
2178  * trace_buffered_event_enable - enable buffering events
2179  *
2180  * When events are being filtered, it is quicker to use a temporary
2181  * buffer to write the event data into if there's a likely chance
2182  * that it will not be committed. The discard of the ring buffer
2183  * is not as fast as committing, and is much slower than copying
2184  * a commit.
2185  *
2186  * When an event is to be filtered, allocate per cpu buffers to
2187  * write the event data into, and if the event is filtered and discarded
2188  * it is simply dropped, otherwise, the entire data is to be committed
2189  * in one shot.
2190  */
2191 void trace_buffered_event_enable(void)
2192 {
2193         struct ring_buffer_event *event;
2194         struct page *page;
2195         int cpu;
2196
2197         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2198
2199         if (trace_buffered_event_ref++)
2200                 return;
2201
2202         for_each_tracing_cpu(cpu) {
2203                 page = alloc_pages_node(cpu_to_node(cpu),
2204                                         GFP_KERNEL | __GFP_NORETRY, 0);
2205                 if (!page)
2206                         goto failed;
2207
2208                 event = page_address(page);
2209                 memset(event, 0, sizeof(*event));
2210
2211                 per_cpu(trace_buffered_event, cpu) = event;
2212
2213                 preempt_disable();
2214                 if (cpu == smp_processor_id() &&
2215                     this_cpu_read(trace_buffered_event) !=
2216                     per_cpu(trace_buffered_event, cpu))
2217                         WARN_ON_ONCE(1);
2218                 preempt_enable();
2219         }
2220
2221         return;
2222  failed:
2223         trace_buffered_event_disable();
2224 }
2225
2226 static void enable_trace_buffered_event(void *data)
2227 {
2228         /* Probably not needed, but do it anyway */
2229         smp_rmb();
2230         this_cpu_dec(trace_buffered_event_cnt);
2231 }
2232
2233 static void disable_trace_buffered_event(void *data)
2234 {
2235         this_cpu_inc(trace_buffered_event_cnt);
2236 }
2237
2238 /**
2239  * trace_buffered_event_disable - disable buffering events
2240  *
2241  * When a filter is removed, it is faster to not use the buffered
2242  * events, and to commit directly into the ring buffer. Free up
2243  * the temp buffers when there are no more users. This requires
2244  * special synchronization with current events.
2245  */
2246 void trace_buffered_event_disable(void)
2247 {
2248         int cpu;
2249
2250         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2251
2252         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2253                 return;
2254
2255         if (--trace_buffered_event_ref)
2256                 return;
2257
2258         preempt_disable();
2259         /* For each CPU, set the buffer as used. */
2260         smp_call_function_many(tracing_buffer_mask,
2261                                disable_trace_buffered_event, NULL, 1);
2262         preempt_enable();
2263
2264         /* Wait for all current users to finish */
2265         synchronize_sched();
2266
2267         for_each_tracing_cpu(cpu) {
2268                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2269                 per_cpu(trace_buffered_event, cpu) = NULL;
2270         }
2271         /*
2272          * Make sure trace_buffered_event is NULL before clearing
2273          * trace_buffered_event_cnt.
2274          */
2275         smp_wmb();
2276
2277         preempt_disable();
2278         /* Do the work on each cpu */
2279         smp_call_function_many(tracing_buffer_mask,
2280                                enable_trace_buffered_event, NULL, 1);
2281         preempt_enable();
2282 }
2283
2284 static struct ring_buffer *temp_buffer;
2285
2286 struct ring_buffer_event *
2287 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2288                           struct trace_event_file *trace_file,
2289                           int type, unsigned long len,
2290                           unsigned long flags, int pc)
2291 {
2292         struct ring_buffer_event *entry;
2293         int val;
2294
2295         *current_rb = trace_file->tr->trace_buffer.buffer;
2296
2297         if ((trace_file->flags &
2298              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2299             (entry = this_cpu_read(trace_buffered_event))) {
2300                 /* Try to use the per cpu buffer first */
2301                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2302                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2303                         trace_event_setup(entry, type, flags, pc);
2304                         entry->array[0] = len;
2305                         return entry;
2306                 }
2307                 this_cpu_dec(trace_buffered_event_cnt);
2308         }
2309
2310         entry = __trace_buffer_lock_reserve(*current_rb,
2311                                             type, len, flags, pc);
2312         /*
2313          * If tracing is off, but we have triggers enabled
2314          * we still need to look at the event data. Use the temp_buffer
2315          * to store the trace event for the tigger to use. It's recusive
2316          * safe and will not be recorded anywhere.
2317          */
2318         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2319                 *current_rb = temp_buffer;
2320                 entry = __trace_buffer_lock_reserve(*current_rb,
2321                                                     type, len, flags, pc);
2322         }
2323         return entry;
2324 }
2325 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2326
2327 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2328 static DEFINE_MUTEX(tracepoint_printk_mutex);
2329
2330 static void output_printk(struct trace_event_buffer *fbuffer)
2331 {
2332         struct trace_event_call *event_call;
2333         struct trace_event *event;
2334         unsigned long flags;
2335         struct trace_iterator *iter = tracepoint_print_iter;
2336
2337         /* We should never get here if iter is NULL */
2338         if (WARN_ON_ONCE(!iter))
2339                 return;
2340
2341         event_call = fbuffer->trace_file->event_call;
2342         if (!event_call || !event_call->event.funcs ||
2343             !event_call->event.funcs->trace)
2344                 return;
2345
2346         event = &fbuffer->trace_file->event_call->event;
2347
2348         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2349         trace_seq_init(&iter->seq);
2350         iter->ent = fbuffer->entry;
2351         event_call->event.funcs->trace(iter, 0, event);
2352         trace_seq_putc(&iter->seq, 0);
2353         printk("%s", iter->seq.buffer);
2354
2355         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2356 }
2357
2358 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2359                              void __user *buffer, size_t *lenp,
2360                              loff_t *ppos)
2361 {
2362         int save_tracepoint_printk;
2363         int ret;
2364
2365         mutex_lock(&tracepoint_printk_mutex);
2366         save_tracepoint_printk = tracepoint_printk;
2367
2368         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2369
2370         /*
2371          * This will force exiting early, as tracepoint_printk
2372          * is always zero when tracepoint_printk_iter is not allocated
2373          */
2374         if (!tracepoint_print_iter)
2375                 tracepoint_printk = 0;
2376
2377         if (save_tracepoint_printk == tracepoint_printk)
2378                 goto out;
2379
2380         if (tracepoint_printk)
2381                 static_key_enable(&tracepoint_printk_key.key);
2382         else
2383                 static_key_disable(&tracepoint_printk_key.key);
2384
2385  out:
2386         mutex_unlock(&tracepoint_printk_mutex);
2387
2388         return ret;
2389 }
2390
2391 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2392 {
2393         if (static_key_false(&tracepoint_printk_key.key))
2394                 output_printk(fbuffer);
2395
2396         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2397                                     fbuffer->event, fbuffer->entry,
2398                                     fbuffer->flags, fbuffer->pc);
2399 }
2400 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2401
2402 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2403                                      struct ring_buffer *buffer,
2404                                      struct ring_buffer_event *event,
2405                                      unsigned long flags, int pc,
2406                                      struct pt_regs *regs)
2407 {
2408         __buffer_unlock_commit(buffer, event);
2409
2410         /*
2411          * If regs is not set, then skip the following callers:
2412          *   trace_buffer_unlock_commit_regs
2413          *   event_trigger_unlock_commit
2414          *   trace_event_buffer_commit
2415          *   trace_event_raw_event_sched_switch
2416          * Note, we can still get here via blktrace, wakeup tracer
2417          * and mmiotrace, but that's ok if they lose a function or
2418          * two. They are that meaningful.
2419          */
2420         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2421         ftrace_trace_userstack(tr, buffer, flags, pc);
2422 }
2423
2424 /*
2425  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2426  */
2427 void
2428 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2429                                    struct ring_buffer_event *event)
2430 {
2431         __buffer_unlock_commit(buffer, event);
2432 }
2433
2434 static void
2435 trace_process_export(struct trace_export *export,
2436                struct ring_buffer_event *event)
2437 {
2438         struct trace_entry *entry;
2439         unsigned int size = 0;
2440
2441         entry = ring_buffer_event_data(event);
2442         size = ring_buffer_event_length(event);
2443         export->write(entry, size);
2444 }
2445
2446 static DEFINE_MUTEX(ftrace_export_lock);
2447
2448 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2449
2450 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2451
2452 static inline void ftrace_exports_enable(void)
2453 {
2454         static_branch_enable(&ftrace_exports_enabled);
2455 }
2456
2457 static inline void ftrace_exports_disable(void)
2458 {
2459         static_branch_disable(&ftrace_exports_enabled);
2460 }
2461
2462 void ftrace_exports(struct ring_buffer_event *event)
2463 {
2464         struct trace_export *export;
2465
2466         preempt_disable_notrace();
2467
2468         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2469         while (export) {
2470                 trace_process_export(export, event);
2471                 export = rcu_dereference_raw_notrace(export->next);
2472         }
2473
2474         preempt_enable_notrace();
2475 }
2476
2477 static inline void
2478 add_trace_export(struct trace_export **list, struct trace_export *export)
2479 {
2480         rcu_assign_pointer(export->next, *list);
2481         /*
2482          * We are entering export into the list but another
2483          * CPU might be walking that list. We need to make sure
2484          * the export->next pointer is valid before another CPU sees
2485          * the export pointer included into the list.
2486          */
2487         rcu_assign_pointer(*list, export);
2488 }
2489
2490 static inline int
2491 rm_trace_export(struct trace_export **list, struct trace_export *export)
2492 {
2493         struct trace_export **p;
2494
2495         for (p = list; *p != NULL; p = &(*p)->next)
2496                 if (*p == export)
2497                         break;
2498
2499         if (*p != export)
2500                 return -1;
2501
2502         rcu_assign_pointer(*p, (*p)->next);
2503
2504         return 0;
2505 }
2506
2507 static inline void
2508 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2509 {
2510         if (*list == NULL)
2511                 ftrace_exports_enable();
2512
2513         add_trace_export(list, export);
2514 }
2515
2516 static inline int
2517 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2518 {
2519         int ret;
2520
2521         ret = rm_trace_export(list, export);
2522         if (*list == NULL)
2523                 ftrace_exports_disable();
2524
2525         return ret;
2526 }
2527
2528 int register_ftrace_export(struct trace_export *export)
2529 {
2530         if (WARN_ON_ONCE(!export->write))
2531                 return -1;
2532
2533         mutex_lock(&ftrace_export_lock);
2534
2535         add_ftrace_export(&ftrace_exports_list, export);
2536
2537         mutex_unlock(&ftrace_export_lock);
2538
2539         return 0;
2540 }
2541 EXPORT_SYMBOL_GPL(register_ftrace_export);
2542
2543 int unregister_ftrace_export(struct trace_export *export)
2544 {
2545         int ret;
2546
2547         mutex_lock(&ftrace_export_lock);
2548
2549         ret = rm_ftrace_export(&ftrace_exports_list, export);
2550
2551         mutex_unlock(&ftrace_export_lock);
2552
2553         return ret;
2554 }
2555 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2556
2557 void
2558 trace_function(struct trace_array *tr,
2559                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2560                int pc)
2561 {
2562         struct trace_event_call *call = &event_function;
2563         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2564         struct ring_buffer_event *event;
2565         struct ftrace_entry *entry;
2566
2567         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2568                                             flags, pc);
2569         if (!event)
2570                 return;
2571         entry   = ring_buffer_event_data(event);
2572         entry->ip                       = ip;
2573         entry->parent_ip                = parent_ip;
2574
2575         if (!call_filter_check_discard(call, entry, buffer, event)) {
2576                 if (static_branch_unlikely(&ftrace_exports_enabled))
2577                         ftrace_exports(event);
2578                 __buffer_unlock_commit(buffer, event);
2579         }
2580 }
2581
2582 #ifdef CONFIG_STACKTRACE
2583
2584 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2585 struct ftrace_stack {
2586         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2587 };
2588
2589 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2590 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2591
2592 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2593                                  unsigned long flags,
2594                                  int skip, int pc, struct pt_regs *regs)
2595 {
2596         struct trace_event_call *call = &event_kernel_stack;
2597         struct ring_buffer_event *event;
2598         struct stack_entry *entry;
2599         struct stack_trace trace;
2600         int use_stack;
2601         int size = FTRACE_STACK_ENTRIES;
2602
2603         trace.nr_entries        = 0;
2604         trace.skip              = skip;
2605
2606         /*
2607          * Add two, for this function and the call to save_stack_trace()
2608          * If regs is set, then these functions will not be in the way.
2609          */
2610         if (!regs)
2611                 trace.skip += 2;
2612
2613         /*
2614          * Since events can happen in NMIs there's no safe way to
2615          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2616          * or NMI comes in, it will just have to use the default
2617          * FTRACE_STACK_SIZE.
2618          */
2619         preempt_disable_notrace();
2620
2621         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2622         /*
2623          * We don't need any atomic variables, just a barrier.
2624          * If an interrupt comes in, we don't care, because it would
2625          * have exited and put the counter back to what we want.
2626          * We just need a barrier to keep gcc from moving things
2627          * around.
2628          */
2629         barrier();
2630         if (use_stack == 1) {
2631                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2632                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2633
2634                 if (regs)
2635                         save_stack_trace_regs(regs, &trace);
2636                 else
2637                         save_stack_trace(&trace);
2638
2639                 if (trace.nr_entries > size)
2640                         size = trace.nr_entries;
2641         } else
2642                 /* From now on, use_stack is a boolean */
2643                 use_stack = 0;
2644
2645         size *= sizeof(unsigned long);
2646
2647         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2648                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2649                                     flags, pc);
2650         if (!event)
2651                 goto out;
2652         entry = ring_buffer_event_data(event);
2653
2654         memset(&entry->caller, 0, size);
2655
2656         if (use_stack)
2657                 memcpy(&entry->caller, trace.entries,
2658                        trace.nr_entries * sizeof(unsigned long));
2659         else {
2660                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2661                 trace.entries           = entry->caller;
2662                 if (regs)
2663                         save_stack_trace_regs(regs, &trace);
2664                 else
2665                         save_stack_trace(&trace);
2666         }
2667
2668         entry->size = trace.nr_entries;
2669
2670         if (!call_filter_check_discard(call, entry, buffer, event))
2671                 __buffer_unlock_commit(buffer, event);
2672
2673  out:
2674         /* Again, don't let gcc optimize things here */
2675         barrier();
2676         __this_cpu_dec(ftrace_stack_reserve);
2677         preempt_enable_notrace();
2678
2679 }
2680
2681 static inline void ftrace_trace_stack(struct trace_array *tr,
2682                                       struct ring_buffer *buffer,
2683                                       unsigned long flags,
2684                                       int skip, int pc, struct pt_regs *regs)
2685 {
2686         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2687                 return;
2688
2689         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2690 }
2691
2692 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2693                    int pc)
2694 {
2695         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2696
2697         if (rcu_is_watching()) {
2698                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2699                 return;
2700         }
2701
2702         /*
2703          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2704          * but if the above rcu_is_watching() failed, then the NMI
2705          * triggered someplace critical, and rcu_irq_enter() should
2706          * not be called from NMI.
2707          */
2708         if (unlikely(in_nmi()))
2709                 return;
2710
2711         /*
2712          * It is possible that a function is being traced in a
2713          * location that RCU is not watching. A call to
2714          * rcu_irq_enter() will make sure that it is, but there's
2715          * a few internal rcu functions that could be traced
2716          * where that wont work either. In those cases, we just
2717          * do nothing.
2718          */
2719         if (unlikely(rcu_irq_enter_disabled()))
2720                 return;
2721
2722         rcu_irq_enter_irqson();
2723         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2724         rcu_irq_exit_irqson();
2725 }
2726
2727 /**
2728  * trace_dump_stack - record a stack back trace in the trace buffer
2729  * @skip: Number of functions to skip (helper handlers)
2730  */
2731 void trace_dump_stack(int skip)
2732 {
2733         unsigned long flags;
2734
2735         if (tracing_disabled || tracing_selftest_running)
2736                 return;
2737
2738         local_save_flags(flags);
2739
2740         /*
2741          * Skip 3 more, seems to get us at the caller of
2742          * this function.
2743          */
2744         skip += 3;
2745         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2746                              flags, skip, preempt_count(), NULL);
2747 }
2748
2749 static DEFINE_PER_CPU(int, user_stack_count);
2750
2751 void
2752 ftrace_trace_userstack(struct trace_array *tr,
2753                        struct ring_buffer *buffer, unsigned long flags, int pc)
2754 {
2755         struct trace_event_call *call = &event_user_stack;
2756         struct ring_buffer_event *event;
2757         struct userstack_entry *entry;
2758         struct stack_trace trace;
2759
2760         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2761                 return;
2762
2763         /*
2764          * NMIs can not handle page faults, even with fix ups.
2765          * The save user stack can (and often does) fault.
2766          */
2767         if (unlikely(in_nmi()))
2768                 return;
2769
2770         /*
2771          * prevent recursion, since the user stack tracing may
2772          * trigger other kernel events.
2773          */
2774         preempt_disable();
2775         if (__this_cpu_read(user_stack_count))
2776                 goto out;
2777
2778         __this_cpu_inc(user_stack_count);
2779
2780         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2781                                             sizeof(*entry), flags, pc);
2782         if (!event)
2783                 goto out_drop_count;
2784         entry   = ring_buffer_event_data(event);
2785
2786         entry->tgid             = current->tgid;
2787         memset(&entry->caller, 0, sizeof(entry->caller));
2788
2789         trace.nr_entries        = 0;
2790         trace.max_entries       = FTRACE_STACK_ENTRIES;
2791         trace.skip              = 0;
2792         trace.entries           = entry->caller;
2793
2794         save_stack_trace_user(&trace);
2795         if (!call_filter_check_discard(call, entry, buffer, event))
2796                 __buffer_unlock_commit(buffer, event);
2797
2798  out_drop_count:
2799         __this_cpu_dec(user_stack_count);
2800  out:
2801         preempt_enable();
2802 }
2803
2804 #ifdef UNUSED
2805 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2806 {
2807         ftrace_trace_userstack(tr, flags, preempt_count());
2808 }
2809 #endif /* UNUSED */
2810
2811 #endif /* CONFIG_STACKTRACE */
2812
2813 /* created for use with alloc_percpu */
2814 struct trace_buffer_struct {
2815         int nesting;
2816         char buffer[4][TRACE_BUF_SIZE];
2817 };
2818
2819 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2820
2821 /*
2822  * Thise allows for lockless recording.  If we're nested too deeply, then
2823  * this returns NULL.
2824  */
2825 static char *get_trace_buf(void)
2826 {
2827         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2828
2829         if (!trace_percpu_buffer || buffer->nesting >= 4)
2830                 return NULL;
2831
2832         buffer->nesting++;
2833
2834         /* Interrupts must see nesting incremented before we use the buffer */
2835         barrier();
2836         return &buffer->buffer[buffer->nesting - 1][0];
2837 }
2838
2839 static void put_trace_buf(void)
2840 {
2841         /* Don't let the decrement of nesting leak before this */
2842         barrier();
2843         this_cpu_dec(trace_percpu_buffer->nesting);
2844 }
2845
2846 static int alloc_percpu_trace_buffer(void)
2847 {
2848         struct trace_buffer_struct __percpu *buffers;
2849
2850         buffers = alloc_percpu(struct trace_buffer_struct);
2851         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2852                 return -ENOMEM;
2853
2854         trace_percpu_buffer = buffers;
2855         return 0;
2856 }
2857
2858 static int buffers_allocated;
2859
2860 void trace_printk_init_buffers(void)
2861 {
2862         if (buffers_allocated)
2863                 return;
2864
2865         if (alloc_percpu_trace_buffer())
2866                 return;
2867
2868         /* trace_printk() is for debug use only. Don't use it in production. */
2869
2870         pr_warn("\n");
2871         pr_warn("**********************************************************\n");
2872         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2873         pr_warn("**                                                      **\n");
2874         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2875         pr_warn("**                                                      **\n");
2876         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2877         pr_warn("** unsafe for production use.                           **\n");
2878         pr_warn("**                                                      **\n");
2879         pr_warn("** If you see this message and you are not debugging    **\n");
2880         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2881         pr_warn("**                                                      **\n");
2882         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2883         pr_warn("**********************************************************\n");
2884
2885         /* Expand the buffers to set size */
2886         tracing_update_buffers();
2887
2888         buffers_allocated = 1;
2889
2890         /*
2891          * trace_printk_init_buffers() can be called by modules.
2892          * If that happens, then we need to start cmdline recording
2893          * directly here. If the global_trace.buffer is already
2894          * allocated here, then this was called by module code.
2895          */
2896         if (global_trace.trace_buffer.buffer)
2897                 tracing_start_cmdline_record();
2898 }
2899
2900 void trace_printk_start_comm(void)
2901 {
2902         /* Start tracing comms if trace printk is set */
2903         if (!buffers_allocated)
2904                 return;
2905         tracing_start_cmdline_record();
2906 }
2907
2908 static void trace_printk_start_stop_comm(int enabled)
2909 {
2910         if (!buffers_allocated)
2911                 return;
2912
2913         if (enabled)
2914                 tracing_start_cmdline_record();
2915         else
2916                 tracing_stop_cmdline_record();
2917 }
2918
2919 /**
2920  * trace_vbprintk - write binary msg to tracing buffer
2921  *
2922  */
2923 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2924 {
2925         struct trace_event_call *call = &event_bprint;
2926         struct ring_buffer_event *event;
2927         struct ring_buffer *buffer;
2928         struct trace_array *tr = &global_trace;
2929         struct bprint_entry *entry;
2930         unsigned long flags;
2931         char *tbuffer;
2932         int len = 0, size, pc;
2933
2934         if (unlikely(tracing_selftest_running || tracing_disabled))
2935                 return 0;
2936
2937         /* Don't pollute graph traces with trace_vprintk internals */
2938         pause_graph_tracing();
2939
2940         pc = preempt_count();
2941         preempt_disable_notrace();
2942
2943         tbuffer = get_trace_buf();
2944         if (!tbuffer) {
2945                 len = 0;
2946                 goto out_nobuffer;
2947         }
2948
2949         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2950
2951         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2952                 goto out;
2953
2954         local_save_flags(flags);
2955         size = sizeof(*entry) + sizeof(u32) * len;
2956         buffer = tr->trace_buffer.buffer;
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2958                                             flags, pc);
2959         if (!event)
2960                 goto out;
2961         entry = ring_buffer_event_data(event);
2962         entry->ip                       = ip;
2963         entry->fmt                      = fmt;
2964
2965         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2966         if (!call_filter_check_discard(call, entry, buffer, event)) {
2967                 __buffer_unlock_commit(buffer, event);
2968                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2969         }
2970
2971 out:
2972         put_trace_buf();
2973
2974 out_nobuffer:
2975         preempt_enable_notrace();
2976         unpause_graph_tracing();
2977
2978         return len;
2979 }
2980 EXPORT_SYMBOL_GPL(trace_vbprintk);
2981
2982 __printf(3, 0)
2983 static int
2984 __trace_array_vprintk(struct ring_buffer *buffer,
2985                       unsigned long ip, const char *fmt, va_list args)
2986 {
2987         struct trace_event_call *call = &event_print;
2988         struct ring_buffer_event *event;
2989         int len = 0, size, pc;
2990         struct print_entry *entry;
2991         unsigned long flags;
2992         char *tbuffer;
2993
2994         if (tracing_disabled || tracing_selftest_running)
2995                 return 0;
2996
2997         /* Don't pollute graph traces with trace_vprintk internals */
2998         pause_graph_tracing();
2999
3000         pc = preempt_count();
3001         preempt_disable_notrace();
3002
3003
3004         tbuffer = get_trace_buf();
3005         if (!tbuffer) {
3006                 len = 0;
3007                 goto out_nobuffer;
3008         }
3009
3010         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3011
3012         local_save_flags(flags);
3013         size = sizeof(*entry) + len + 1;
3014         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3015                                             flags, pc);
3016         if (!event)
3017                 goto out;
3018         entry = ring_buffer_event_data(event);
3019         entry->ip = ip;
3020
3021         memcpy(&entry->buf, tbuffer, len + 1);
3022         if (!call_filter_check_discard(call, entry, buffer, event)) {
3023                 __buffer_unlock_commit(buffer, event);
3024                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3025         }
3026
3027 out:
3028         put_trace_buf();
3029
3030 out_nobuffer:
3031         preempt_enable_notrace();
3032         unpause_graph_tracing();
3033
3034         return len;
3035 }
3036
3037 __printf(3, 0)
3038 int trace_array_vprintk(struct trace_array *tr,
3039                         unsigned long ip, const char *fmt, va_list args)
3040 {
3041         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3042 }
3043
3044 __printf(3, 0)
3045 int trace_array_printk(struct trace_array *tr,
3046                        unsigned long ip, const char *fmt, ...)
3047 {
3048         int ret;
3049         va_list ap;
3050
3051         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052                 return 0;
3053
3054         if (!tr)
3055                 return -ENOENT;
3056
3057         va_start(ap, fmt);
3058         ret = trace_array_vprintk(tr, ip, fmt, ap);
3059         va_end(ap);
3060         return ret;
3061 }
3062
3063 __printf(3, 4)
3064 int trace_array_printk_buf(struct ring_buffer *buffer,
3065                            unsigned long ip, const char *fmt, ...)
3066 {
3067         int ret;
3068         va_list ap;
3069
3070         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3071                 return 0;
3072
3073         va_start(ap, fmt);
3074         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3075         va_end(ap);
3076         return ret;
3077 }
3078
3079 __printf(2, 0)
3080 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082         return trace_array_vprintk(&global_trace, ip, fmt, args);
3083 }
3084 EXPORT_SYMBOL_GPL(trace_vprintk);
3085
3086 static void trace_iterator_increment(struct trace_iterator *iter)
3087 {
3088         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3089
3090         iter->idx++;
3091         if (buf_iter)
3092                 ring_buffer_read(buf_iter, NULL);
3093 }
3094
3095 static struct trace_entry *
3096 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3097                 unsigned long *lost_events)
3098 {
3099         struct ring_buffer_event *event;
3100         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3101
3102         if (buf_iter)
3103                 event = ring_buffer_iter_peek(buf_iter, ts);
3104         else
3105                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3106                                          lost_events);
3107
3108         if (event) {
3109                 iter->ent_size = ring_buffer_event_length(event);
3110                 return ring_buffer_event_data(event);
3111         }
3112         iter->ent_size = 0;
3113         return NULL;
3114 }
3115
3116 static struct trace_entry *
3117 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3118                   unsigned long *missing_events, u64 *ent_ts)
3119 {
3120         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3121         struct trace_entry *ent, *next = NULL;
3122         unsigned long lost_events = 0, next_lost = 0;
3123         int cpu_file = iter->cpu_file;
3124         u64 next_ts = 0, ts;
3125         int next_cpu = -1;
3126         int next_size = 0;
3127         int cpu;
3128
3129         /*
3130          * If we are in a per_cpu trace file, don't bother by iterating over
3131          * all cpu and peek directly.
3132          */
3133         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3134                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3135                         return NULL;
3136                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3137                 if (ent_cpu)
3138                         *ent_cpu = cpu_file;
3139
3140                 return ent;
3141         }
3142
3143         for_each_tracing_cpu(cpu) {
3144
3145                 if (ring_buffer_empty_cpu(buffer, cpu))
3146                         continue;
3147
3148                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3149
3150                 /*
3151                  * Pick the entry with the smallest timestamp:
3152                  */
3153                 if (ent && (!next || ts < next_ts)) {
3154                         next = ent;
3155                         next_cpu = cpu;
3156                         next_ts = ts;
3157                         next_lost = lost_events;
3158                         next_size = iter->ent_size;
3159                 }
3160         }
3161
3162         iter->ent_size = next_size;
3163
3164         if (ent_cpu)
3165                 *ent_cpu = next_cpu;
3166
3167         if (ent_ts)
3168                 *ent_ts = next_ts;
3169
3170         if (missing_events)
3171                 *missing_events = next_lost;
3172
3173         return next;
3174 }
3175
3176 /* Find the next real entry, without updating the iterator itself */
3177 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3178                                           int *ent_cpu, u64 *ent_ts)
3179 {
3180         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3181 }
3182
3183 /* Find the next real entry, and increment the iterator to the next entry */
3184 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3185 {
3186         iter->ent = __find_next_entry(iter, &iter->cpu,
3187                                       &iter->lost_events, &iter->ts);
3188
3189         if (iter->ent)
3190                 trace_iterator_increment(iter);
3191
3192         return iter->ent ? iter : NULL;
3193 }
3194
3195 static void trace_consume(struct trace_iterator *iter)
3196 {
3197         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3198                             &iter->lost_events);
3199 }
3200
3201 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3202 {
3203         struct trace_iterator *iter = m->private;
3204         int i = (int)*pos;
3205         void *ent;
3206
3207         WARN_ON_ONCE(iter->leftover);
3208
3209         (*pos)++;
3210
3211         /* can't go backwards */
3212         if (iter->idx > i)
3213                 return NULL;
3214
3215         if (iter->idx < 0)
3216                 ent = trace_find_next_entry_inc(iter);
3217         else
3218                 ent = iter;
3219
3220         while (ent && iter->idx < i)
3221                 ent = trace_find_next_entry_inc(iter);
3222
3223         iter->pos = *pos;
3224
3225         return ent;
3226 }
3227
3228 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3229 {
3230         struct ring_buffer_event *event;
3231         struct ring_buffer_iter *buf_iter;
3232         unsigned long entries = 0;
3233         u64 ts;
3234
3235         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3236
3237         buf_iter = trace_buffer_iter(iter, cpu);
3238         if (!buf_iter)
3239                 return;
3240
3241         ring_buffer_iter_reset(buf_iter);
3242
3243         /*
3244          * We could have the case with the max latency tracers
3245          * that a reset never took place on a cpu. This is evident
3246          * by the timestamp being before the start of the buffer.
3247          */
3248         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3249                 if (ts >= iter->trace_buffer->time_start)
3250                         break;
3251                 entries++;
3252                 ring_buffer_read(buf_iter, NULL);
3253         }
3254
3255         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3256 }
3257
3258 /*
3259  * The current tracer is copied to avoid a global locking
3260  * all around.
3261  */
3262 static void *s_start(struct seq_file *m, loff_t *pos)
3263 {
3264         struct trace_iterator *iter = m->private;
3265         struct trace_array *tr = iter->tr;
3266         int cpu_file = iter->cpu_file;
3267         void *p = NULL;
3268         loff_t l = 0;
3269         int cpu;
3270
3271         /*
3272          * copy the tracer to avoid using a global lock all around.
3273          * iter->trace is a copy of current_trace, the pointer to the
3274          * name may be used instead of a strcmp(), as iter->trace->name
3275          * will point to the same string as current_trace->name.
3276          */
3277         mutex_lock(&trace_types_lock);
3278         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3279                 *iter->trace = *tr->current_trace;
3280         mutex_unlock(&trace_types_lock);
3281
3282 #ifdef CONFIG_TRACER_MAX_TRACE
3283         if (iter->snapshot && iter->trace->use_max_tr)
3284                 return ERR_PTR(-EBUSY);
3285 #endif
3286
3287         if (*pos != iter->pos) {
3288                 iter->ent = NULL;
3289                 iter->cpu = 0;
3290                 iter->idx = -1;
3291
3292                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3293                         for_each_tracing_cpu(cpu)
3294                                 tracing_iter_reset(iter, cpu);
3295                 } else
3296                         tracing_iter_reset(iter, cpu_file);
3297
3298                 iter->leftover = 0;
3299                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3300                         ;
3301
3302         } else {
3303                 /*
3304                  * If we overflowed the seq_file before, then we want
3305                  * to just reuse the trace_seq buffer again.
3306                  */
3307                 if (iter->leftover)
3308                         p = iter;
3309                 else {
3310                         l = *pos - 1;
3311                         p = s_next(m, p, &l);
3312                 }
3313         }
3314
3315         trace_event_read_lock();
3316         trace_access_lock(cpu_file);
3317         return p;
3318 }
3319
3320 static void s_stop(struct seq_file *m, void *p)
3321 {
3322         struct trace_iterator *iter = m->private;
3323
3324 #ifdef CONFIG_TRACER_MAX_TRACE
3325         if (iter->snapshot && iter->trace->use_max_tr)
3326                 return;
3327 #endif
3328
3329         trace_access_unlock(iter->cpu_file);
3330         trace_event_read_unlock();
3331 }
3332
3333 static void
3334 get_total_entries(struct trace_buffer *buf,
3335                   unsigned long *total, unsigned long *entries)
3336 {
3337         unsigned long count;
3338         int cpu;
3339
3340         *total = 0;
3341         *entries = 0;
3342
3343         for_each_tracing_cpu(cpu) {
3344                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3345                 /*
3346                  * If this buffer has skipped entries, then we hold all
3347                  * entries for the trace and we need to ignore the
3348                  * ones before the time stamp.
3349                  */
3350                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3351                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3352                         /* total is the same as the entries */
3353                         *total += count;
3354                 } else
3355                         *total += count +
3356                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3357                 *entries += count;
3358         }
3359 }
3360
3361 static void print_lat_help_header(struct seq_file *m)
3362 {
3363         seq_puts(m, "#                  _------=> CPU#            \n"
3364                     "#                 / _-----=> irqs-off        \n"
3365                     "#                | / _----=> need-resched    \n"
3366                     "#                || / _---=> hardirq/softirq \n"
3367                     "#                ||| / _--=> preempt-depth   \n"
3368                     "#                |||| /     delay            \n"
3369                     "#  cmd     pid   ||||| time  |   caller      \n"
3370                     "#     \\   /      |||||  \\    |   /         \n");
3371 }
3372
3373 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3374 {
3375         unsigned long total;
3376         unsigned long entries;
3377
3378         get_total_entries(buf, &total, &entries);
3379         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3380                    entries, total, num_online_cpus());
3381         seq_puts(m, "#\n");
3382 }
3383
3384 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3385                                    unsigned int flags)
3386 {
3387         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3388
3389         print_event_info(buf, m);
3390
3391         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3392         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3393 }
3394
3395 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3396                                        unsigned int flags)
3397 {
3398         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3399         const char tgid_space[] = "          ";
3400         const char space[] = "  ";
3401
3402         print_event_info(buf, m);
3403
3404         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3405                    tgid ? tgid_space : space);
3406         seq_printf(m, "#                          %s / _----=> need-resched\n",
3407                    tgid ? tgid_space : space);
3408         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3409                    tgid ? tgid_space : space);
3410         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3411                    tgid ? tgid_space : space);
3412         seq_printf(m, "#                          %s||| /     delay\n",
3413                    tgid ? tgid_space : space);
3414         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3415                    tgid ? "   TGID   " : space);
3416         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3417                    tgid ? "     |    " : space);
3418 }
3419
3420 void
3421 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3422 {
3423         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3424         struct trace_buffer *buf = iter->trace_buffer;
3425         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3426         struct tracer *type = iter->trace;
3427         unsigned long entries;
3428         unsigned long total;
3429         const char *name = "preemption";
3430
3431         name = type->name;
3432
3433         get_total_entries(buf, &total, &entries);
3434
3435         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3436                    name, UTS_RELEASE);
3437         seq_puts(m, "# -----------------------------------"
3438                  "---------------------------------\n");
3439         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3440                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3441                    nsecs_to_usecs(data->saved_latency),
3442                    entries,
3443                    total,
3444                    buf->cpu,
3445 #if defined(CONFIG_PREEMPT_NONE)
3446                    "server",
3447 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3448                    "desktop",
3449 #elif defined(CONFIG_PREEMPT)
3450                    "preempt",
3451 #else
3452                    "unknown",
3453 #endif
3454                    /* These are reserved for later use */
3455                    0, 0, 0, 0);
3456 #ifdef CONFIG_SMP
3457         seq_printf(m, " #P:%d)\n", num_online_cpus());
3458 #else
3459         seq_puts(m, ")\n");
3460 #endif
3461         seq_puts(m, "#    -----------------\n");
3462         seq_printf(m, "#    | task: %.16s-%d "
3463                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3464                    data->comm, data->pid,
3465                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3466                    data->policy, data->rt_priority);
3467         seq_puts(m, "#    -----------------\n");
3468
3469         if (data->critical_start) {
3470                 seq_puts(m, "#  => started at: ");
3471                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3472                 trace_print_seq(m, &iter->seq);
3473                 seq_puts(m, "\n#  => ended at:   ");
3474                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3475                 trace_print_seq(m, &iter->seq);
3476                 seq_puts(m, "\n#\n");
3477         }
3478
3479         seq_puts(m, "#\n");
3480 }
3481
3482 static void test_cpu_buff_start(struct trace_iterator *iter)
3483 {
3484         struct trace_seq *s = &iter->seq;
3485         struct trace_array *tr = iter->tr;
3486
3487         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3488                 return;
3489
3490         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3491                 return;
3492
3493         if (cpumask_available(iter->started) &&
3494             cpumask_test_cpu(iter->cpu, iter->started))
3495                 return;
3496
3497         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3498                 return;
3499
3500         if (cpumask_available(iter->started))
3501                 cpumask_set_cpu(iter->cpu, iter->started);
3502
3503         /* Don't print started cpu buffer for the first entry of the trace */
3504         if (iter->idx > 1)
3505                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3506                                 iter->cpu);
3507 }
3508
3509 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3510 {
3511         struct trace_array *tr = iter->tr;
3512         struct trace_seq *s = &iter->seq;
3513         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3514         struct trace_entry *entry;
3515         struct trace_event *event;
3516
3517         entry = iter->ent;
3518
3519         test_cpu_buff_start(iter);
3520
3521         event = ftrace_find_event(entry->type);
3522
3523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3524                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3525                         trace_print_lat_context(iter);
3526                 else
3527                         trace_print_context(iter);
3528         }
3529
3530         if (trace_seq_has_overflowed(s))
3531                 return TRACE_TYPE_PARTIAL_LINE;
3532
3533         if (event)
3534                 return event->funcs->trace(iter, sym_flags, event);
3535
3536         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3537
3538         return trace_handle_return(s);
3539 }
3540
3541 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3542 {
3543         struct trace_array *tr = iter->tr;
3544         struct trace_seq *s = &iter->seq;
3545         struct trace_entry *entry;
3546         struct trace_event *event;
3547
3548         entry = iter->ent;
3549
3550         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3551                 trace_seq_printf(s, "%d %d %llu ",
3552                                  entry->pid, iter->cpu, iter->ts);
3553
3554         if (trace_seq_has_overflowed(s))
3555                 return TRACE_TYPE_PARTIAL_LINE;
3556
3557         event = ftrace_find_event(entry->type);
3558         if (event)
3559                 return event->funcs->raw(iter, 0, event);
3560
3561         trace_seq_printf(s, "%d ?\n", entry->type);
3562
3563         return trace_handle_return(s);
3564 }
3565
3566 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3567 {
3568         struct trace_array *tr = iter->tr;
3569         struct trace_seq *s = &iter->seq;
3570         unsigned char newline = '\n';
3571         struct trace_entry *entry;
3572         struct trace_event *event;
3573
3574         entry = iter->ent;
3575
3576         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3577                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3578                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3579                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3580                 if (trace_seq_has_overflowed(s))
3581                         return TRACE_TYPE_PARTIAL_LINE;
3582         }
3583
3584         event = ftrace_find_event(entry->type);
3585         if (event) {
3586                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3587                 if (ret != TRACE_TYPE_HANDLED)
3588                         return ret;
3589         }
3590
3591         SEQ_PUT_FIELD(s, newline);
3592
3593         return trace_handle_return(s);
3594 }
3595
3596 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3597 {
3598         struct trace_array *tr = iter->tr;
3599         struct trace_seq *s = &iter->seq;
3600         struct trace_entry *entry;
3601         struct trace_event *event;
3602
3603         entry = iter->ent;
3604
3605         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3606                 SEQ_PUT_FIELD(s, entry->pid);
3607                 SEQ_PUT_FIELD(s, iter->cpu);
3608                 SEQ_PUT_FIELD(s, iter->ts);
3609                 if (trace_seq_has_overflowed(s))
3610                         return TRACE_TYPE_PARTIAL_LINE;
3611         }
3612
3613         event = ftrace_find_event(entry->type);
3614         return event ? event->funcs->binary(iter, 0, event) :
3615                 TRACE_TYPE_HANDLED;
3616 }
3617
3618 int trace_empty(struct trace_iterator *iter)
3619 {
3620         struct ring_buffer_iter *buf_iter;
3621         int cpu;
3622
3623         /* If we are looking at one CPU buffer, only check that one */
3624         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3625                 cpu = iter->cpu_file;
3626                 buf_iter = trace_buffer_iter(iter, cpu);
3627                 if (buf_iter) {
3628                         if (!ring_buffer_iter_empty(buf_iter))
3629                                 return 0;
3630                 } else {
3631                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3632                                 return 0;
3633                 }
3634                 return 1;
3635         }
3636
3637         for_each_tracing_cpu(cpu) {
3638                 buf_iter = trace_buffer_iter(iter, cpu);
3639                 if (buf_iter) {
3640                         if (!ring_buffer_iter_empty(buf_iter))
3641                                 return 0;
3642                 } else {
3643                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3644                                 return 0;
3645                 }
3646         }
3647
3648         return 1;
3649 }
3650
3651 /*  Called with trace_event_read_lock() held. */
3652 enum print_line_t print_trace_line(struct trace_iterator *iter)
3653 {
3654         struct trace_array *tr = iter->tr;
3655         unsigned long trace_flags = tr->trace_flags;
3656         enum print_line_t ret;
3657
3658         if (iter->lost_events) {
3659                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3660                                  iter->cpu, iter->lost_events);
3661                 if (trace_seq_has_overflowed(&iter->seq))
3662                         return TRACE_TYPE_PARTIAL_LINE;
3663         }
3664
3665         if (iter->trace && iter->trace->print_line) {
3666                 ret = iter->trace->print_line(iter);
3667                 if (ret != TRACE_TYPE_UNHANDLED)
3668                         return ret;
3669         }
3670
3671         if (iter->ent->type == TRACE_BPUTS &&
3672                         trace_flags & TRACE_ITER_PRINTK &&
3673                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3674                 return trace_print_bputs_msg_only(iter);
3675
3676         if (iter->ent->type == TRACE_BPRINT &&
3677                         trace_flags & TRACE_ITER_PRINTK &&
3678                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3679                 return trace_print_bprintk_msg_only(iter);
3680
3681         if (iter->ent->type == TRACE_PRINT &&
3682                         trace_flags & TRACE_ITER_PRINTK &&
3683                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3684                 return trace_print_printk_msg_only(iter);
3685
3686         if (trace_flags & TRACE_ITER_BIN)
3687                 return print_bin_fmt(iter);
3688
3689         if (trace_flags & TRACE_ITER_HEX)
3690                 return print_hex_fmt(iter);
3691
3692         if (trace_flags & TRACE_ITER_RAW)
3693                 return print_raw_fmt(iter);
3694
3695         return print_trace_fmt(iter);
3696 }
3697
3698 void trace_latency_header(struct seq_file *m)
3699 {
3700         struct trace_iterator *iter = m->private;
3701         struct trace_array *tr = iter->tr;
3702
3703         /* print nothing if the buffers are empty */
3704         if (trace_empty(iter))
3705                 return;
3706
3707         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3708                 print_trace_header(m, iter);
3709
3710         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3711                 print_lat_help_header(m);
3712 }
3713
3714 void trace_default_header(struct seq_file *m)
3715 {
3716         struct trace_iterator *iter = m->private;
3717         struct trace_array *tr = iter->tr;
3718         unsigned long trace_flags = tr->trace_flags;
3719
3720         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3721                 return;
3722
3723         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3724                 /* print nothing if the buffers are empty */
3725                 if (trace_empty(iter))
3726                         return;
3727                 print_trace_header(m, iter);
3728                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3729                         print_lat_help_header(m);
3730         } else {
3731                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3732                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3733                                 print_func_help_header_irq(iter->trace_buffer,
3734                                                            m, trace_flags);
3735                         else
3736                                 print_func_help_header(iter->trace_buffer, m,
3737                                                        trace_flags);
3738                 }
3739         }
3740 }
3741
3742 static void test_ftrace_alive(struct seq_file *m)
3743 {
3744         if (!ftrace_is_dead())
3745                 return;
3746         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3747                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3748 }
3749
3750 #ifdef CONFIG_TRACER_MAX_TRACE
3751 static void show_snapshot_main_help(struct seq_file *m)
3752 {
3753         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3754                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3755                     "#                      Takes a snapshot of the main buffer.\n"
3756                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3757                     "#                      (Doesn't have to be '2' works with any number that\n"
3758                     "#                       is not a '0' or '1')\n");
3759 }
3760
3761 static void show_snapshot_percpu_help(struct seq_file *m)
3762 {
3763         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3764 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3765         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3766                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3767 #else
3768         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3769                     "#                     Must use main snapshot file to allocate.\n");
3770 #endif
3771         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3772                     "#                      (Doesn't have to be '2' works with any number that\n"
3773                     "#                       is not a '0' or '1')\n");
3774 }
3775
3776 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3777 {
3778         if (iter->tr->allocated_snapshot)
3779                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3780         else
3781                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3782
3783         seq_puts(m, "# Snapshot commands:\n");
3784         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3785                 show_snapshot_main_help(m);
3786         else
3787                 show_snapshot_percpu_help(m);
3788 }
3789 #else
3790 /* Should never be called */
3791 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3792 #endif
3793
3794 static int s_show(struct seq_file *m, void *v)
3795 {
3796         struct trace_iterator *iter = v;
3797         int ret;
3798
3799         if (iter->ent == NULL) {
3800                 if (iter->tr) {
3801                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3802                         seq_puts(m, "#\n");
3803                         test_ftrace_alive(m);
3804                 }
3805                 if (iter->snapshot && trace_empty(iter))
3806                         print_snapshot_help(m, iter);
3807                 else if (iter->trace && iter->trace->print_header)
3808                         iter->trace->print_header(m);
3809                 else
3810                         trace_default_header(m);
3811
3812         } else if (iter->leftover) {
3813                 /*
3814                  * If we filled the seq_file buffer earlier, we
3815                  * want to just show it now.
3816                  */
3817                 ret = trace_print_seq(m, &iter->seq);
3818
3819                 /* ret should this time be zero, but you never know */
3820                 iter->leftover = ret;
3821
3822         } else {
3823                 print_trace_line(iter);
3824                 ret = trace_print_seq(m, &iter->seq);
3825                 /*
3826                  * If we overflow the seq_file buffer, then it will
3827                  * ask us for this data again at start up.
3828                  * Use that instead.
3829                  *  ret is 0 if seq_file write succeeded.
3830                  *        -1 otherwise.
3831                  */
3832                 iter->leftover = ret;
3833         }
3834
3835         return 0;
3836 }
3837
3838 /*
3839  * Should be used after trace_array_get(), trace_types_lock
3840  * ensures that i_cdev was already initialized.
3841  */
3842 static inline int tracing_get_cpu(struct inode *inode)
3843 {
3844         if (inode->i_cdev) /* See trace_create_cpu_file() */
3845                 return (long)inode->i_cdev - 1;
3846         return RING_BUFFER_ALL_CPUS;
3847 }
3848
3849 static const struct seq_operations tracer_seq_ops = {
3850         .start          = s_start,
3851         .next           = s_next,
3852         .stop           = s_stop,
3853         .show           = s_show,
3854 };
3855
3856 static struct trace_iterator *
3857 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3858 {
3859         struct trace_array *tr = inode->i_private;
3860         struct trace_iterator *iter;
3861         int cpu;
3862
3863         if (tracing_disabled)
3864                 return ERR_PTR(-ENODEV);
3865
3866         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3867         if (!iter)
3868                 return ERR_PTR(-ENOMEM);
3869
3870         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3871                                     GFP_KERNEL);
3872         if (!iter->buffer_iter)
3873                 goto release;
3874
3875         /*
3876          * We make a copy of the current tracer to avoid concurrent
3877          * changes on it while we are reading.
3878          */
3879         mutex_lock(&trace_types_lock);
3880         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3881         if (!iter->trace)
3882                 goto fail;
3883
3884         *iter->trace = *tr->current_trace;
3885
3886         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3887                 goto fail;
3888
3889         iter->tr = tr;
3890
3891 #ifdef CONFIG_TRACER_MAX_TRACE
3892         /* Currently only the top directory has a snapshot */
3893         if (tr->current_trace->print_max || snapshot)
3894                 iter->trace_buffer = &tr->max_buffer;
3895         else
3896 #endif
3897                 iter->trace_buffer = &tr->trace_buffer;
3898         iter->snapshot = snapshot;
3899         iter->pos = -1;
3900         iter->cpu_file = tracing_get_cpu(inode);
3901         mutex_init(&iter->mutex);
3902
3903         /* Notify the tracer early; before we stop tracing. */
3904         if (iter->trace && iter->trace->open)
3905                 iter->trace->open(iter);
3906
3907         /* Annotate start of buffers if we had overruns */
3908         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3909                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3910
3911         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3912         if (trace_clocks[tr->clock_id].in_ns)
3913                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3914
3915         /* stop the trace while dumping if we are not opening "snapshot" */
3916         if (!iter->snapshot)
3917                 tracing_stop_tr(tr);
3918
3919         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3920                 for_each_tracing_cpu(cpu) {
3921                         iter->buffer_iter[cpu] =
3922                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3923                                                          cpu, GFP_KERNEL);
3924                 }
3925                 ring_buffer_read_prepare_sync();
3926                 for_each_tracing_cpu(cpu) {
3927                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3928                         tracing_iter_reset(iter, cpu);
3929                 }
3930         } else {
3931                 cpu = iter->cpu_file;
3932                 iter->buffer_iter[cpu] =
3933                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3934                                                  cpu, GFP_KERNEL);
3935                 ring_buffer_read_prepare_sync();
3936                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3937                 tracing_iter_reset(iter, cpu);
3938         }
3939
3940         mutex_unlock(&trace_types_lock);
3941
3942         return iter;
3943
3944  fail:
3945         mutex_unlock(&trace_types_lock);
3946         kfree(iter->trace);
3947         kfree(iter->buffer_iter);
3948 release:
3949         seq_release_private(inode, file);
3950         return ERR_PTR(-ENOMEM);
3951 }
3952
3953 int tracing_open_generic(struct inode *inode, struct file *filp)
3954 {
3955         if (tracing_disabled)
3956                 return -ENODEV;
3957
3958         filp->private_data = inode->i_private;
3959         return 0;
3960 }
3961
3962 bool tracing_is_disabled(void)
3963 {
3964         return (tracing_disabled) ? true: false;
3965 }
3966
3967 /*
3968  * Open and update trace_array ref count.
3969  * Must have the current trace_array passed to it.
3970  */
3971 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3972 {
3973         struct trace_array *tr = inode->i_private;
3974
3975         if (tracing_disabled)
3976                 return -ENODEV;
3977
3978         if (trace_array_get(tr) < 0)
3979                 return -ENODEV;
3980
3981         filp->private_data = inode->i_private;
3982
3983         return 0;
3984 }
3985
3986 static int tracing_release(struct inode *inode, struct file *file)
3987 {
3988         struct trace_array *tr = inode->i_private;
3989         struct seq_file *m = file->private_data;
3990         struct trace_iterator *iter;
3991         int cpu;
3992
3993         if (!(file->f_mode & FMODE_READ)) {
3994                 trace_array_put(tr);
3995                 return 0;
3996         }
3997
3998         /* Writes do not use seq_file */
3999         iter = m->private;
4000         mutex_lock(&trace_types_lock);
4001
4002         for_each_tracing_cpu(cpu) {
4003                 if (iter->buffer_iter[cpu])
4004                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4005         }
4006
4007         if (iter->trace && iter->trace->close)
4008                 iter->trace->close(iter);
4009
4010         if (!iter->snapshot)
4011                 /* reenable tracing if it was previously enabled */
4012                 tracing_start_tr(tr);
4013
4014         __trace_array_put(tr);
4015
4016         mutex_unlock(&trace_types_lock);
4017
4018         mutex_destroy(&iter->mutex);
4019         free_cpumask_var(iter->started);
4020         kfree(iter->trace);
4021         kfree(iter->buffer_iter);
4022         seq_release_private(inode, file);
4023
4024         return 0;
4025 }
4026
4027 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4028 {
4029         struct trace_array *tr = inode->i_private;
4030
4031         trace_array_put(tr);
4032         return 0;
4033 }
4034
4035 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4036 {
4037         struct trace_array *tr = inode->i_private;
4038
4039         trace_array_put(tr);
4040
4041         return single_release(inode, file);
4042 }
4043
4044 static int tracing_open(struct inode *inode, struct file *file)
4045 {
4046         struct trace_array *tr = inode->i_private;
4047         struct trace_iterator *iter;
4048         int ret = 0;
4049
4050         if (trace_array_get(tr) < 0)
4051                 return -ENODEV;
4052
4053         /* If this file was open for write, then erase contents */
4054         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4055                 int cpu = tracing_get_cpu(inode);
4056                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4057
4058 #ifdef CONFIG_TRACER_MAX_TRACE
4059                 if (tr->current_trace->print_max)
4060                         trace_buf = &tr->max_buffer;
4061 #endif
4062
4063                 if (cpu == RING_BUFFER_ALL_CPUS)
4064                         tracing_reset_online_cpus(trace_buf);
4065                 else
4066                         tracing_reset(trace_buf, cpu);
4067         }
4068
4069         if (file->f_mode & FMODE_READ) {
4070                 iter = __tracing_open(inode, file, false);
4071                 if (IS_ERR(iter))
4072                         ret = PTR_ERR(iter);
4073                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4074                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4075         }
4076
4077         if (ret < 0)
4078                 trace_array_put(tr);
4079
4080         return ret;
4081 }
4082
4083 /*
4084  * Some tracers are not suitable for instance buffers.
4085  * A tracer is always available for the global array (toplevel)
4086  * or if it explicitly states that it is.
4087  */
4088 static bool
4089 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4090 {
4091         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4092 }
4093
4094 /* Find the next tracer that this trace array may use */
4095 static struct tracer *
4096 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4097 {
4098         while (t && !trace_ok_for_array(t, tr))
4099                 t = t->next;
4100
4101         return t;
4102 }
4103
4104 static void *
4105 t_next(struct seq_file *m, void *v, loff_t *pos)
4106 {
4107         struct trace_array *tr = m->private;
4108         struct tracer *t = v;
4109
4110         (*pos)++;
4111
4112         if (t)
4113                 t = get_tracer_for_array(tr, t->next);
4114
4115         return t;
4116 }
4117
4118 static void *t_start(struct seq_file *m, loff_t *pos)
4119 {
4120         struct trace_array *tr = m->private;
4121         struct tracer *t;
4122         loff_t l = 0;
4123
4124         mutex_lock(&trace_types_lock);
4125
4126         t = get_tracer_for_array(tr, trace_types);
4127         for (; t && l < *pos; t = t_next(m, t, &l))
4128                         ;
4129
4130         return t;
4131 }
4132
4133 static void t_stop(struct seq_file *m, void *p)
4134 {
4135         mutex_unlock(&trace_types_lock);
4136 }
4137
4138 static int t_show(struct seq_file *m, void *v)
4139 {
4140         struct tracer *t = v;
4141
4142         if (!t)
4143                 return 0;
4144
4145         seq_puts(m, t->name);
4146         if (t->next)
4147                 seq_putc(m, ' ');
4148         else
4149                 seq_putc(m, '\n');
4150
4151         return 0;
4152 }
4153
4154 static const struct seq_operations show_traces_seq_ops = {
4155         .start          = t_start,
4156         .next           = t_next,
4157         .stop           = t_stop,
4158         .show           = t_show,
4159 };
4160
4161 static int show_traces_open(struct inode *inode, struct file *file)
4162 {
4163         struct trace_array *tr = inode->i_private;
4164         struct seq_file *m;
4165         int ret;
4166
4167         if (tracing_disabled)
4168                 return -ENODEV;
4169
4170         if (trace_array_get(tr) < 0)
4171                 return -ENODEV;
4172
4173         ret = seq_open(file, &show_traces_seq_ops);
4174         if (ret) {
4175                 trace_array_put(tr);
4176                 return ret;
4177         }
4178
4179         m = file->private_data;
4180         m->private = tr;
4181
4182         return 0;
4183 }
4184
4185 static int show_traces_release(struct inode *inode, struct file *file)
4186 {
4187         struct trace_array *tr = inode->i_private;
4188
4189         trace_array_put(tr);
4190         return seq_release(inode, file);
4191 }
4192
4193 static ssize_t
4194 tracing_write_stub(struct file *filp, const char __user *ubuf,
4195                    size_t count, loff_t *ppos)
4196 {
4197         return count;
4198 }
4199
4200 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4201 {
4202         int ret;
4203
4204         if (file->f_mode & FMODE_READ)
4205                 ret = seq_lseek(file, offset, whence);
4206         else
4207                 file->f_pos = ret = 0;
4208
4209         return ret;
4210 }
4211
4212 static const struct file_operations tracing_fops = {
4213         .open           = tracing_open,
4214         .read           = seq_read,
4215         .write          = tracing_write_stub,
4216         .llseek         = tracing_lseek,
4217         .release        = tracing_release,
4218 };
4219
4220 static const struct file_operations show_traces_fops = {
4221         .open           = show_traces_open,
4222         .read           = seq_read,
4223         .llseek         = seq_lseek,
4224         .release        = show_traces_release,
4225 };
4226
4227 static ssize_t
4228 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4229                      size_t count, loff_t *ppos)
4230 {
4231         struct trace_array *tr = file_inode(filp)->i_private;
4232         char *mask_str;
4233         int len;
4234
4235         len = snprintf(NULL, 0, "%*pb\n",
4236                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4237         mask_str = kmalloc(len, GFP_KERNEL);
4238         if (!mask_str)
4239                 return -ENOMEM;
4240
4241         len = snprintf(mask_str, len, "%*pb\n",
4242                        cpumask_pr_args(tr->tracing_cpumask));
4243         if (len >= count) {
4244                 count = -EINVAL;
4245                 goto out_err;
4246         }
4247         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4248
4249 out_err:
4250         kfree(mask_str);
4251
4252         return count;
4253 }
4254
4255 static ssize_t
4256 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4257                       size_t count, loff_t *ppos)
4258 {
4259         struct trace_array *tr = file_inode(filp)->i_private;
4260         cpumask_var_t tracing_cpumask_new;
4261         int err, cpu;
4262
4263         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4264                 return -ENOMEM;
4265
4266         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4267         if (err)
4268                 goto err_unlock;
4269
4270         local_irq_disable();
4271         arch_spin_lock(&tr->max_lock);
4272         for_each_tracing_cpu(cpu) {
4273                 /*
4274                  * Increase/decrease the disabled counter if we are
4275                  * about to flip a bit in the cpumask:
4276                  */
4277                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4278                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4279                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4280                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4281                 }
4282                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4283                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4284                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4285                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4286                 }
4287         }
4288         arch_spin_unlock(&tr->max_lock);
4289         local_irq_enable();
4290
4291         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4292         free_cpumask_var(tracing_cpumask_new);
4293
4294         return count;
4295
4296 err_unlock:
4297         free_cpumask_var(tracing_cpumask_new);
4298
4299         return err;
4300 }
4301
4302 static const struct file_operations tracing_cpumask_fops = {
4303         .open           = tracing_open_generic_tr,
4304         .read           = tracing_cpumask_read,
4305         .write          = tracing_cpumask_write,
4306         .release        = tracing_release_generic_tr,
4307         .llseek         = generic_file_llseek,
4308 };
4309
4310 static int tracing_trace_options_show(struct seq_file *m, void *v)
4311 {
4312         struct tracer_opt *trace_opts;
4313         struct trace_array *tr = m->private;
4314         u32 tracer_flags;
4315         int i;
4316
4317         mutex_lock(&trace_types_lock);
4318         tracer_flags = tr->current_trace->flags->val;
4319         trace_opts = tr->current_trace->flags->opts;
4320
4321         for (i = 0; trace_options[i]; i++) {
4322                 if (tr->trace_flags & (1 << i))
4323                         seq_printf(m, "%s\n", trace_options[i]);
4324                 else
4325                         seq_printf(m, "no%s\n", trace_options[i]);
4326         }
4327
4328         for (i = 0; trace_opts[i].name; i++) {
4329                 if (tracer_flags & trace_opts[i].bit)
4330                         seq_printf(m, "%s\n", trace_opts[i].name);
4331                 else
4332                         seq_printf(m, "no%s\n", trace_opts[i].name);
4333         }
4334         mutex_unlock(&trace_types_lock);
4335
4336         return 0;
4337 }
4338
4339 static int __set_tracer_option(struct trace_array *tr,
4340                                struct tracer_flags *tracer_flags,
4341                                struct tracer_opt *opts, int neg)
4342 {
4343         struct tracer *trace = tracer_flags->trace;
4344         int ret;
4345
4346         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4347         if (ret)
4348                 return ret;
4349
4350         if (neg)
4351                 tracer_flags->val &= ~opts->bit;
4352         else
4353                 tracer_flags->val |= opts->bit;
4354         return 0;
4355 }
4356
4357 /* Try to assign a tracer specific option */
4358 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4359 {
4360         struct tracer *trace = tr->current_trace;
4361         struct tracer_flags *tracer_flags = trace->flags;
4362         struct tracer_opt *opts = NULL;
4363         int i;
4364
4365         for (i = 0; tracer_flags->opts[i].name; i++) {
4366                 opts = &tracer_flags->opts[i];
4367
4368                 if (strcmp(cmp, opts->name) == 0)
4369                         return __set_tracer_option(tr, trace->flags, opts, neg);
4370         }
4371
4372         return -EINVAL;
4373 }
4374
4375 /* Some tracers require overwrite to stay enabled */
4376 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4377 {
4378         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4379                 return -1;
4380
4381         return 0;
4382 }
4383
4384 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4385 {
4386         int *map;
4387
4388         if ((mask == TRACE_ITER_RECORD_TGID) ||
4389             (mask == TRACE_ITER_RECORD_CMD))
4390                 lockdep_assert_held(&event_mutex);
4391
4392         /* do nothing if flag is already set */
4393         if (!!(tr->trace_flags & mask) == !!enabled)
4394                 return 0;
4395
4396         /* Give the tracer a chance to approve the change */
4397         if (tr->current_trace->flag_changed)
4398                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4399                         return -EINVAL;
4400
4401         if (enabled)
4402                 tr->trace_flags |= mask;
4403         else
4404                 tr->trace_flags &= ~mask;
4405
4406         if (mask == TRACE_ITER_RECORD_CMD)
4407                 trace_event_enable_cmd_record(enabled);
4408
4409         if (mask == TRACE_ITER_RECORD_TGID) {
4410                 if (!tgid_map) {
4411                         tgid_map_max = pid_max;
4412                         map = kzalloc((tgid_map_max + 1) * sizeof(*tgid_map),
4413                                       GFP_KERNEL);
4414
4415                         /*
4416                          * Pairs with smp_load_acquire() in
4417                          * trace_find_tgid_ptr() to ensure that if it observes
4418                          * the tgid_map we just allocated then it also observes
4419                          * the corresponding tgid_map_max value.
4420                          */
4421                         smp_store_release(&tgid_map, map);
4422                 }
4423                 if (!tgid_map) {
4424                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4425                         return -ENOMEM;
4426                 }
4427
4428                 trace_event_enable_tgid_record(enabled);
4429         }
4430
4431         if (mask == TRACE_ITER_EVENT_FORK)
4432                 trace_event_follow_fork(tr, enabled);
4433
4434         if (mask == TRACE_ITER_FUNC_FORK)
4435                 ftrace_pid_follow_fork(tr, enabled);
4436
4437         if (mask == TRACE_ITER_OVERWRITE) {
4438                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4439 #ifdef CONFIG_TRACER_MAX_TRACE
4440                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4441 #endif
4442         }
4443
4444         if (mask == TRACE_ITER_PRINTK) {
4445                 trace_printk_start_stop_comm(enabled);
4446                 trace_printk_control(enabled);
4447         }
4448
4449         return 0;
4450 }
4451
4452 static int trace_set_options(struct trace_array *tr, char *option)
4453 {
4454         char *cmp;
4455         int neg = 0;
4456         int ret = -ENODEV;
4457         int i;
4458         size_t orig_len = strlen(option);
4459
4460         cmp = strstrip(option);
4461
4462         if (strncmp(cmp, "no", 2) == 0) {
4463                 neg = 1;
4464                 cmp += 2;
4465         }
4466
4467         mutex_lock(&event_mutex);
4468         mutex_lock(&trace_types_lock);
4469
4470         for (i = 0; trace_options[i]; i++) {
4471                 if (strcmp(cmp, trace_options[i]) == 0) {
4472                         ret = set_tracer_flag(tr, 1 << i, !neg);
4473                         break;
4474                 }
4475         }
4476
4477         /* If no option could be set, test the specific tracer options */
4478         if (!trace_options[i])
4479                 ret = set_tracer_option(tr, cmp, neg);
4480
4481         mutex_unlock(&trace_types_lock);
4482         mutex_unlock(&event_mutex);
4483
4484         /*
4485          * If the first trailing whitespace is replaced with '\0' by strstrip,
4486          * turn it back into a space.
4487          */
4488         if (orig_len > strlen(option))
4489                 option[strlen(option)] = ' ';
4490
4491         return ret;
4492 }
4493
4494 static void __init apply_trace_boot_options(void)
4495 {
4496         char *buf = trace_boot_options_buf;
4497         char *option;
4498
4499         while (true) {
4500                 option = strsep(&buf, ",");
4501
4502                 if (!option)
4503                         break;
4504
4505                 if (*option)
4506                         trace_set_options(&global_trace, option);
4507
4508                 /* Put back the comma to allow this to be called again */
4509                 if (buf)
4510                         *(buf - 1) = ',';
4511         }
4512 }
4513
4514 static ssize_t
4515 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4516                         size_t cnt, loff_t *ppos)
4517 {
4518         struct seq_file *m = filp->private_data;
4519         struct trace_array *tr = m->private;
4520         char buf[64];
4521         int ret;
4522
4523         if (cnt >= sizeof(buf))
4524                 return -EINVAL;
4525
4526         if (copy_from_user(buf, ubuf, cnt))
4527                 return -EFAULT;
4528
4529         buf[cnt] = 0;
4530
4531         ret = trace_set_options(tr, buf);
4532         if (ret < 0)
4533                 return ret;
4534
4535         *ppos += cnt;
4536
4537         return cnt;
4538 }
4539
4540 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4541 {
4542         struct trace_array *tr = inode->i_private;
4543         int ret;
4544
4545         if (tracing_disabled)
4546                 return -ENODEV;
4547
4548         if (trace_array_get(tr) < 0)
4549                 return -ENODEV;
4550
4551         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4552         if (ret < 0)
4553                 trace_array_put(tr);
4554
4555         return ret;
4556 }
4557
4558 static const struct file_operations tracing_iter_fops = {
4559         .open           = tracing_trace_options_open,
4560         .read           = seq_read,
4561         .llseek         = seq_lseek,
4562         .release        = tracing_single_release_tr,
4563         .write          = tracing_trace_options_write,
4564 };
4565
4566 static const char readme_msg[] =
4567         "tracing mini-HOWTO:\n\n"
4568         "# echo 0 > tracing_on : quick way to disable tracing\n"
4569         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4570         " Important files:\n"
4571         "  trace\t\t\t- The static contents of the buffer\n"
4572         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4573         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4574         "  current_tracer\t- function and latency tracers\n"
4575         "  available_tracers\t- list of configured tracers for current_tracer\n"
4576         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4577         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4578         "  trace_clock\t\t-change the clock used to order events\n"
4579         "       local:   Per cpu clock but may not be synced across CPUs\n"
4580         "      global:   Synced across CPUs but slows tracing down.\n"
4581         "     counter:   Not a clock, but just an increment\n"
4582         "      uptime:   Jiffy counter from time of boot\n"
4583         "        perf:   Same clock that perf events use\n"
4584 #ifdef CONFIG_X86_64
4585         "     x86-tsc:   TSC cycle counter\n"
4586 #endif
4587         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4588         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4591         "\t\t\t  Remove sub-buffer with rmdir\n"
4592         "  trace_options\t\t- Set format or modify how tracing happens\n"
4593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4594         "\t\t\t  option name\n"
4595         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "\n  available_filter_functions - list of functions that can be filtered on\n"
4598         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4599         "\t\t\t  functions\n"
4600         "\t     accepts: func_full_name or glob-matching-pattern\n"
4601         "\t     modules: Can select a group via module\n"
4602         "\t      Format: :mod:<module-name>\n"
4603         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4604         "\t    triggers: a command to perform when function is hit\n"
4605         "\t      Format: <function>:<trigger>[:count]\n"
4606         "\t     trigger: traceon, traceoff\n"
4607         "\t\t      enable_event:<system>:<event>\n"
4608         "\t\t      disable_event:<system>:<event>\n"
4609 #ifdef CONFIG_STACKTRACE
4610         "\t\t      stacktrace\n"
4611 #endif
4612 #ifdef CONFIG_TRACER_SNAPSHOT
4613         "\t\t      snapshot\n"
4614 #endif
4615         "\t\t      dump\n"
4616         "\t\t      cpudump\n"
4617         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4618         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4619         "\t     The first one will disable tracing every time do_fault is hit\n"
4620         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4621         "\t       The first time do trap is hit and it disables tracing, the\n"
4622         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4623         "\t       the counter will not decrement. It only decrements when the\n"
4624         "\t       trigger did work\n"
4625         "\t     To remove trigger without count:\n"
4626         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4627         "\t     To remove trigger with a count:\n"
4628         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4629         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4630         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4631         "\t    modules: Can select a group via module command :mod:\n"
4632         "\t    Does not accept triggers\n"
4633 #endif /* CONFIG_DYNAMIC_FTRACE */
4634 #ifdef CONFIG_FUNCTION_TRACER
4635         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4636         "\t\t    (function)\n"
4637 #endif
4638 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4639         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4640         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4641         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4642 #endif
4643 #ifdef CONFIG_TRACER_SNAPSHOT
4644         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4645         "\t\t\t  snapshot buffer. Read the contents for more\n"
4646         "\t\t\t  information\n"
4647 #endif
4648 #ifdef CONFIG_STACK_TRACER
4649         "  stack_trace\t\t- Shows the max stack trace when active\n"
4650         "  stack_max_size\t- Shows current max stack size that was traced\n"
4651         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4652         "\t\t\t  new trace)\n"
4653 #ifdef CONFIG_DYNAMIC_FTRACE
4654         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4655         "\t\t\t  traces\n"
4656 #endif
4657 #endif /* CONFIG_STACK_TRACER */
4658 #ifdef CONFIG_KPROBE_EVENTS
4659         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4660         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4661 #endif
4662 #ifdef CONFIG_UPROBE_EVENTS
4663         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4664         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4665 #endif
4666 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4667         "\t  accepts: event-definitions (one definition per line)\n"
4668         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4669         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4670         "\t           -:[<group>/]<event>\n"
4671 #ifdef CONFIG_KPROBE_EVENTS
4672         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4673   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4674 #endif
4675 #ifdef CONFIG_UPROBE_EVENTS
4676         "\t    place: <path>:<offset>\n"
4677 #endif
4678         "\t     args: <name>=fetcharg[:type]\n"
4679         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4680         "\t           $stack<index>, $stack, $retval, $comm\n"
4681         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4682         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4683 #endif
4684         "  events/\t\t- Directory containing all trace event subsystems:\n"
4685         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4686         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4687         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4688         "\t\t\t  events\n"
4689         "      filter\t\t- If set, only events passing filter are traced\n"
4690         "  events/<system>/<event>/\t- Directory containing control files for\n"
4691         "\t\t\t  <event>:\n"
4692         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4693         "      filter\t\t- If set, only events passing filter are traced\n"
4694         "      trigger\t\t- If set, a command to perform when event is hit\n"
4695         "\t    Format: <trigger>[:count][if <filter>]\n"
4696         "\t   trigger: traceon, traceoff\n"
4697         "\t            enable_event:<system>:<event>\n"
4698         "\t            disable_event:<system>:<event>\n"
4699 #ifdef CONFIG_HIST_TRIGGERS
4700         "\t            enable_hist:<system>:<event>\n"
4701         "\t            disable_hist:<system>:<event>\n"
4702 #endif
4703 #ifdef CONFIG_STACKTRACE
4704         "\t\t    stacktrace\n"
4705 #endif
4706 #ifdef CONFIG_TRACER_SNAPSHOT
4707         "\t\t    snapshot\n"
4708 #endif
4709 #ifdef CONFIG_HIST_TRIGGERS
4710         "\t\t    hist (see below)\n"
4711 #endif
4712         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4713         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4714         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4715         "\t                  events/block/block_unplug/trigger\n"
4716         "\t   The first disables tracing every time block_unplug is hit.\n"
4717         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4718         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4719         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4720         "\t   Like function triggers, the counter is only decremented if it\n"
4721         "\t    enabled or disabled tracing.\n"
4722         "\t   To remove a trigger without a count:\n"
4723         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4724         "\t   To remove a trigger with a count:\n"
4725         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4726         "\t   Filters can be ignored when removing a trigger.\n"
4727 #ifdef CONFIG_HIST_TRIGGERS
4728         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4729         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4730         "\t            [:values=<field1[,field2,...]>]\n"
4731         "\t            [:sort=<field1[,field2,...]>]\n"
4732         "\t            [:size=#entries]\n"
4733         "\t            [:pause][:continue][:clear]\n"
4734         "\t            [:name=histname1]\n"
4735         "\t            [if <filter>]\n\n"
4736         "\t    When a matching event is hit, an entry is added to a hash\n"
4737         "\t    table using the key(s) and value(s) named, and the value of a\n"
4738         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4739         "\t    correspond to fields in the event's format description.  Keys\n"
4740         "\t    can be any field, or the special string 'stacktrace'.\n"
4741         "\t    Compound keys consisting of up to two fields can be specified\n"
4742         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4743         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4744         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4745         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4746         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4747         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4748         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4749         "\t    its histogram data will be shared with other triggers of the\n"
4750         "\t    same name, and trigger hits will update this common data.\n\n"
4751         "\t    Reading the 'hist' file for the event will dump the hash\n"
4752         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4753         "\t    triggers attached to an event, there will be a table for each\n"
4754         "\t    trigger in the output.  The table displayed for a named\n"
4755         "\t    trigger will be the same as any other instance having the\n"
4756         "\t    same name.  The default format used to display a given field\n"
4757         "\t    can be modified by appending any of the following modifiers\n"
4758         "\t    to the field name, as applicable:\n\n"
4759         "\t            .hex        display a number as a hex value\n"
4760         "\t            .sym        display an address as a symbol\n"
4761         "\t            .sym-offset display an address as a symbol and offset\n"
4762         "\t            .execname   display a common_pid as a program name\n"
4763         "\t            .syscall    display a syscall id as a syscall name\n\n"
4764         "\t            .log2       display log2 value rather than raw number\n\n"
4765         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4766         "\t    trigger or to start a hist trigger but not log any events\n"
4767         "\t    until told to do so.  'continue' can be used to start or\n"
4768         "\t    restart a paused hist trigger.\n\n"
4769         "\t    The 'clear' parameter will clear the contents of a running\n"
4770         "\t    hist trigger and leave its current paused/active state\n"
4771         "\t    unchanged.\n\n"
4772         "\t    The enable_hist and disable_hist triggers can be used to\n"
4773         "\t    have one event conditionally start and stop another event's\n"
4774         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4775         "\t    the enable_event and disable_event triggers.\n"
4776 #endif
4777 ;
4778
4779 static ssize_t
4780 tracing_readme_read(struct file *filp, char __user *ubuf,
4781                        size_t cnt, loff_t *ppos)
4782 {
4783         return simple_read_from_buffer(ubuf, cnt, ppos,
4784                                         readme_msg, strlen(readme_msg));
4785 }
4786
4787 static const struct file_operations tracing_readme_fops = {
4788         .open           = tracing_open_generic,
4789         .read           = tracing_readme_read,
4790         .llseek         = generic_file_llseek,
4791 };
4792
4793 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4794 {
4795         int pid = ++(*pos);
4796
4797         return trace_find_tgid_ptr(pid);
4798 }
4799
4800 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4801 {
4802         int pid = *pos;
4803
4804         return trace_find_tgid_ptr(pid);
4805 }
4806
4807 static void saved_tgids_stop(struct seq_file *m, void *v)
4808 {
4809 }
4810
4811 static int saved_tgids_show(struct seq_file *m, void *v)
4812 {
4813         int *entry = (int *)v;
4814         int pid = entry - tgid_map;
4815         int tgid = *entry;
4816
4817         if (tgid == 0)
4818                 return SEQ_SKIP;
4819
4820         seq_printf(m, "%d %d\n", pid, tgid);
4821         return 0;
4822 }
4823
4824 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4825         .start          = saved_tgids_start,
4826         .stop           = saved_tgids_stop,
4827         .next           = saved_tgids_next,
4828         .show           = saved_tgids_show,
4829 };
4830
4831 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4832 {
4833         if (tracing_disabled)
4834                 return -ENODEV;
4835
4836         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4837 }
4838
4839
4840 static const struct file_operations tracing_saved_tgids_fops = {
4841         .open           = tracing_saved_tgids_open,
4842         .read           = seq_read,
4843         .llseek         = seq_lseek,
4844         .release        = seq_release,
4845 };
4846
4847 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4848 {
4849         unsigned int *ptr = v;
4850
4851         if (*pos || m->count)
4852                 ptr++;
4853
4854         (*pos)++;
4855
4856         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4857              ptr++) {
4858                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4859                         continue;
4860
4861                 return ptr;
4862         }
4863
4864         return NULL;
4865 }
4866
4867 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4868 {
4869         void *v;
4870         loff_t l = 0;
4871
4872         preempt_disable();
4873         arch_spin_lock(&trace_cmdline_lock);
4874
4875         v = &savedcmd->map_cmdline_to_pid[0];
4876         while (l <= *pos) {
4877                 v = saved_cmdlines_next(m, v, &l);
4878                 if (!v)
4879                         return NULL;
4880         }
4881
4882         return v;
4883 }
4884
4885 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4886 {
4887         arch_spin_unlock(&trace_cmdline_lock);
4888         preempt_enable();
4889 }
4890
4891 static int saved_cmdlines_show(struct seq_file *m, void *v)
4892 {
4893         char buf[TASK_COMM_LEN];
4894         unsigned int *pid = v;
4895
4896         __trace_find_cmdline(*pid, buf);
4897         seq_printf(m, "%d %s\n", *pid, buf);
4898         return 0;
4899 }
4900
4901 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4902         .start          = saved_cmdlines_start,
4903         .next           = saved_cmdlines_next,
4904         .stop           = saved_cmdlines_stop,
4905         .show           = saved_cmdlines_show,
4906 };
4907
4908 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4909 {
4910         if (tracing_disabled)
4911                 return -ENODEV;
4912
4913         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4914 }
4915
4916 static const struct file_operations tracing_saved_cmdlines_fops = {
4917         .open           = tracing_saved_cmdlines_open,
4918         .read           = seq_read,
4919         .llseek         = seq_lseek,
4920         .release        = seq_release,
4921 };
4922
4923 static ssize_t
4924 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4925                                  size_t cnt, loff_t *ppos)
4926 {
4927         char buf[64];
4928         int r;
4929
4930         arch_spin_lock(&trace_cmdline_lock);
4931         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4932         arch_spin_unlock(&trace_cmdline_lock);
4933
4934         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4935 }
4936
4937 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4938 {
4939         kfree(s->saved_cmdlines);
4940         kfree(s->map_cmdline_to_pid);
4941         kfree(s);
4942 }
4943
4944 static int tracing_resize_saved_cmdlines(unsigned int val)
4945 {
4946         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4947
4948         s = kmalloc(sizeof(*s), GFP_KERNEL);
4949         if (!s)
4950                 return -ENOMEM;
4951
4952         if (allocate_cmdlines_buffer(val, s) < 0) {
4953                 kfree(s);
4954                 return -ENOMEM;
4955         }
4956
4957         arch_spin_lock(&trace_cmdline_lock);
4958         savedcmd_temp = savedcmd;
4959         savedcmd = s;
4960         arch_spin_unlock(&trace_cmdline_lock);
4961         free_saved_cmdlines_buffer(savedcmd_temp);
4962
4963         return 0;
4964 }
4965
4966 static ssize_t
4967 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4968                                   size_t cnt, loff_t *ppos)
4969 {
4970         unsigned long val;
4971         int ret;
4972
4973         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4974         if (ret)
4975                 return ret;
4976
4977         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4978         if (!val || val > PID_MAX_DEFAULT)
4979                 return -EINVAL;
4980
4981         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4982         if (ret < 0)
4983                 return ret;
4984
4985         *ppos += cnt;
4986
4987         return cnt;
4988 }
4989
4990 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4991         .open           = tracing_open_generic,
4992         .read           = tracing_saved_cmdlines_size_read,
4993         .write          = tracing_saved_cmdlines_size_write,
4994 };
4995
4996 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4997 static union trace_eval_map_item *
4998 update_eval_map(union trace_eval_map_item *ptr)
4999 {
5000         if (!ptr->map.eval_string) {
5001                 if (ptr->tail.next) {
5002                         ptr = ptr->tail.next;
5003                         /* Set ptr to the next real item (skip head) */
5004                         ptr++;
5005                 } else
5006                         return NULL;
5007         }
5008         return ptr;
5009 }
5010
5011 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5012 {
5013         union trace_eval_map_item *ptr = v;
5014
5015         /*
5016          * Paranoid! If ptr points to end, we don't want to increment past it.
5017          * This really should never happen.
5018          */
5019         ptr = update_eval_map(ptr);
5020         if (WARN_ON_ONCE(!ptr))
5021                 return NULL;
5022
5023         ptr++;
5024
5025         (*pos)++;
5026
5027         ptr = update_eval_map(ptr);
5028
5029         return ptr;
5030 }
5031
5032 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5033 {
5034         union trace_eval_map_item *v;
5035         loff_t l = 0;
5036
5037         mutex_lock(&trace_eval_mutex);
5038
5039         v = trace_eval_maps;
5040         if (v)
5041                 v++;
5042
5043         while (v && l < *pos) {
5044                 v = eval_map_next(m, v, &l);
5045         }
5046
5047         return v;
5048 }
5049
5050 static void eval_map_stop(struct seq_file *m, void *v)
5051 {
5052         mutex_unlock(&trace_eval_mutex);
5053 }
5054
5055 static int eval_map_show(struct seq_file *m, void *v)
5056 {
5057         union trace_eval_map_item *ptr = v;
5058
5059         seq_printf(m, "%s %ld (%s)\n",
5060                    ptr->map.eval_string, ptr->map.eval_value,
5061                    ptr->map.system);
5062
5063         return 0;
5064 }
5065
5066 static const struct seq_operations tracing_eval_map_seq_ops = {
5067         .start          = eval_map_start,
5068         .next           = eval_map_next,
5069         .stop           = eval_map_stop,
5070         .show           = eval_map_show,
5071 };
5072
5073 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5074 {
5075         if (tracing_disabled)
5076                 return -ENODEV;
5077
5078         return seq_open(filp, &tracing_eval_map_seq_ops);
5079 }
5080
5081 static const struct file_operations tracing_eval_map_fops = {
5082         .open           = tracing_eval_map_open,
5083         .read           = seq_read,
5084         .llseek         = seq_lseek,
5085         .release        = seq_release,
5086 };
5087
5088 static inline union trace_eval_map_item *
5089 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5090 {
5091         /* Return tail of array given the head */
5092         return ptr + ptr->head.length + 1;
5093 }
5094
5095 static void
5096 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5097                            int len)
5098 {
5099         struct trace_eval_map **stop;
5100         struct trace_eval_map **map;
5101         union trace_eval_map_item *map_array;
5102         union trace_eval_map_item *ptr;
5103
5104         stop = start + len;
5105
5106         /*
5107          * The trace_eval_maps contains the map plus a head and tail item,
5108          * where the head holds the module and length of array, and the
5109          * tail holds a pointer to the next list.
5110          */
5111         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5112         if (!map_array) {
5113                 pr_warn("Unable to allocate trace eval mapping\n");
5114                 return;
5115         }
5116
5117         mutex_lock(&trace_eval_mutex);
5118
5119         if (!trace_eval_maps)
5120                 trace_eval_maps = map_array;
5121         else {
5122                 ptr = trace_eval_maps;
5123                 for (;;) {
5124                         ptr = trace_eval_jmp_to_tail(ptr);
5125                         if (!ptr->tail.next)
5126                                 break;
5127                         ptr = ptr->tail.next;
5128
5129                 }
5130                 ptr->tail.next = map_array;
5131         }
5132         map_array->head.mod = mod;
5133         map_array->head.length = len;
5134         map_array++;
5135
5136         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5137                 map_array->map = **map;
5138                 map_array++;
5139         }
5140         memset(map_array, 0, sizeof(*map_array));
5141
5142         mutex_unlock(&trace_eval_mutex);
5143 }
5144
5145 static void trace_create_eval_file(struct dentry *d_tracer)
5146 {
5147         trace_create_file("eval_map", 0444, d_tracer,
5148                           NULL, &tracing_eval_map_fops);
5149 }
5150
5151 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5152 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5153 static inline void trace_insert_eval_map_file(struct module *mod,
5154                               struct trace_eval_map **start, int len) { }
5155 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5156
5157 static void trace_insert_eval_map(struct module *mod,
5158                                   struct trace_eval_map **start, int len)
5159 {
5160         struct trace_eval_map **map;
5161
5162         if (len <= 0)
5163                 return;
5164
5165         map = start;
5166
5167         trace_event_eval_update(map, len);
5168
5169         trace_insert_eval_map_file(mod, start, len);
5170 }
5171
5172 static ssize_t
5173 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5174                        size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_array *tr = filp->private_data;
5177         char buf[MAX_TRACER_SIZE+2];
5178         int r;
5179
5180         mutex_lock(&trace_types_lock);
5181         r = sprintf(buf, "%s\n", tr->current_trace->name);
5182         mutex_unlock(&trace_types_lock);
5183
5184         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5185 }
5186
5187 int tracer_init(struct tracer *t, struct trace_array *tr)
5188 {
5189         tracing_reset_online_cpus(&tr->trace_buffer);
5190         return t->init(tr);
5191 }
5192
5193 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5194 {
5195         int cpu;
5196
5197         for_each_tracing_cpu(cpu)
5198                 per_cpu_ptr(buf->data, cpu)->entries = val;
5199 }
5200
5201 #ifdef CONFIG_TRACER_MAX_TRACE
5202 /* resize @tr's buffer to the size of @size_tr's entries */
5203 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5204                                         struct trace_buffer *size_buf, int cpu_id)
5205 {
5206         int cpu, ret = 0;
5207
5208         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5209                 for_each_tracing_cpu(cpu) {
5210                         ret = ring_buffer_resize(trace_buf->buffer,
5211                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5212                         if (ret < 0)
5213                                 break;
5214                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5215                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5216                 }
5217         } else {
5218                 ret = ring_buffer_resize(trace_buf->buffer,
5219                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5220                 if (ret == 0)
5221                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5222                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5223         }
5224
5225         return ret;
5226 }
5227 #endif /* CONFIG_TRACER_MAX_TRACE */
5228
5229 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5230                                         unsigned long size, int cpu)
5231 {
5232         int ret;
5233
5234         /*
5235          * If kernel or user changes the size of the ring buffer
5236          * we use the size that was given, and we can forget about
5237          * expanding it later.
5238          */
5239         ring_buffer_expanded = true;
5240
5241         /* May be called before buffers are initialized */
5242         if (!tr->trace_buffer.buffer)
5243                 return 0;
5244
5245         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5246         if (ret < 0)
5247                 return ret;
5248
5249 #ifdef CONFIG_TRACER_MAX_TRACE
5250         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5251             !tr->current_trace->use_max_tr)
5252                 goto out;
5253
5254         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5255         if (ret < 0) {
5256                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5257                                                      &tr->trace_buffer, cpu);
5258                 if (r < 0) {
5259                         /*
5260                          * AARGH! We are left with different
5261                          * size max buffer!!!!
5262                          * The max buffer is our "snapshot" buffer.
5263                          * When a tracer needs a snapshot (one of the
5264                          * latency tracers), it swaps the max buffer
5265                          * with the saved snap shot. We succeeded to
5266                          * update the size of the main buffer, but failed to
5267                          * update the size of the max buffer. But when we tried
5268                          * to reset the main buffer to the original size, we
5269                          * failed there too. This is very unlikely to
5270                          * happen, but if it does, warn and kill all
5271                          * tracing.
5272                          */
5273                         WARN_ON(1);
5274                         tracing_disabled = 1;
5275                 }
5276                 return ret;
5277         }
5278
5279         if (cpu == RING_BUFFER_ALL_CPUS)
5280                 set_buffer_entries(&tr->max_buffer, size);
5281         else
5282                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5283
5284  out:
5285 #endif /* CONFIG_TRACER_MAX_TRACE */
5286
5287         if (cpu == RING_BUFFER_ALL_CPUS)
5288                 set_buffer_entries(&tr->trace_buffer, size);
5289         else
5290                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5291
5292         return ret;
5293 }
5294
5295 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5296                                           unsigned long size, int cpu_id)
5297 {
5298         int ret = size;
5299
5300         mutex_lock(&trace_types_lock);
5301
5302         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5303                 /* make sure, this cpu is enabled in the mask */
5304                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5305                         ret = -EINVAL;
5306                         goto out;
5307                 }
5308         }
5309
5310         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5311         if (ret < 0)
5312                 ret = -ENOMEM;
5313
5314 out:
5315         mutex_unlock(&trace_types_lock);
5316
5317         return ret;
5318 }
5319
5320
5321 /**
5322  * tracing_update_buffers - used by tracing facility to expand ring buffers
5323  *
5324  * To save on memory when the tracing is never used on a system with it
5325  * configured in. The ring buffers are set to a minimum size. But once
5326  * a user starts to use the tracing facility, then they need to grow
5327  * to their default size.
5328  *
5329  * This function is to be called when a tracer is about to be used.
5330  */
5331 int tracing_update_buffers(void)
5332 {
5333         int ret = 0;
5334
5335         mutex_lock(&trace_types_lock);
5336         if (!ring_buffer_expanded)
5337                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5338                                                 RING_BUFFER_ALL_CPUS);
5339         mutex_unlock(&trace_types_lock);
5340
5341         return ret;
5342 }
5343
5344 struct trace_option_dentry;
5345
5346 static void
5347 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5348
5349 /*
5350  * Used to clear out the tracer before deletion of an instance.
5351  * Must have trace_types_lock held.
5352  */
5353 static void tracing_set_nop(struct trace_array *tr)
5354 {
5355         if (tr->current_trace == &nop_trace)
5356                 return;
5357         
5358         tr->current_trace->enabled--;
5359
5360         if (tr->current_trace->reset)
5361                 tr->current_trace->reset(tr);
5362
5363         tr->current_trace = &nop_trace;
5364 }
5365
5366 static bool tracer_options_updated;
5367
5368 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5369 {
5370         /* Only enable if the directory has been created already. */
5371         if (!tr->dir)
5372                 return;
5373
5374         /* Only create trace option files after update_tracer_options finish */
5375         if (!tracer_options_updated)
5376                 return;
5377
5378         create_trace_option_files(tr, t);
5379 }
5380
5381 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5382 {
5383         struct tracer *t;
5384 #ifdef CONFIG_TRACER_MAX_TRACE
5385         bool had_max_tr;
5386 #endif
5387         int ret = 0;
5388
5389         mutex_lock(&trace_types_lock);
5390
5391         if (!ring_buffer_expanded) {
5392                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5393                                                 RING_BUFFER_ALL_CPUS);
5394                 if (ret < 0)
5395                         goto out;
5396                 ret = 0;
5397         }
5398
5399         for (t = trace_types; t; t = t->next) {
5400                 if (strcmp(t->name, buf) == 0)
5401                         break;
5402         }
5403         if (!t) {
5404                 ret = -EINVAL;
5405                 goto out;
5406         }
5407         if (t == tr->current_trace)
5408                 goto out;
5409
5410         /* Some tracers won't work on kernel command line */
5411         if (system_state < SYSTEM_RUNNING && t->noboot) {
5412                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5413                         t->name);
5414                 goto out;
5415         }
5416
5417         /* Some tracers are only allowed for the top level buffer */
5418         if (!trace_ok_for_array(t, tr)) {
5419                 ret = -EINVAL;
5420                 goto out;
5421         }
5422
5423         /* If trace pipe files are being read, we can't change the tracer */
5424         if (tr->current_trace->ref) {
5425                 ret = -EBUSY;
5426                 goto out;
5427         }
5428
5429         trace_branch_disable();
5430
5431         tr->current_trace->enabled--;
5432
5433         if (tr->current_trace->reset)
5434                 tr->current_trace->reset(tr);
5435
5436         /* Current trace needs to be nop_trace before synchronize_sched */
5437         tr->current_trace = &nop_trace;
5438
5439 #ifdef CONFIG_TRACER_MAX_TRACE
5440         had_max_tr = tr->allocated_snapshot;
5441
5442         if (had_max_tr && !t->use_max_tr) {
5443                 /*
5444                  * We need to make sure that the update_max_tr sees that
5445                  * current_trace changed to nop_trace to keep it from
5446                  * swapping the buffers after we resize it.
5447                  * The update_max_tr is called from interrupts disabled
5448                  * so a synchronized_sched() is sufficient.
5449                  */
5450                 synchronize_sched();
5451                 free_snapshot(tr);
5452         }
5453 #endif
5454
5455 #ifdef CONFIG_TRACER_MAX_TRACE
5456         if (t->use_max_tr && !had_max_tr) {
5457                 ret = tracing_alloc_snapshot_instance(tr);
5458                 if (ret < 0)
5459                         goto out;
5460         }
5461 #endif
5462
5463         if (t->init) {
5464                 ret = tracer_init(t, tr);
5465                 if (ret)
5466                         goto out;
5467         }
5468
5469         tr->current_trace = t;
5470         tr->current_trace->enabled++;
5471         trace_branch_enable(tr);
5472  out:
5473         mutex_unlock(&trace_types_lock);
5474
5475         return ret;
5476 }
5477
5478 static ssize_t
5479 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5480                         size_t cnt, loff_t *ppos)
5481 {
5482         struct trace_array *tr = filp->private_data;
5483         char buf[MAX_TRACER_SIZE+1];
5484         int i;
5485         size_t ret;
5486         int err;
5487
5488         ret = cnt;
5489
5490         if (cnt > MAX_TRACER_SIZE)
5491                 cnt = MAX_TRACER_SIZE;
5492
5493         if (copy_from_user(buf, ubuf, cnt))
5494                 return -EFAULT;
5495
5496         buf[cnt] = 0;
5497
5498         /* strip ending whitespace. */
5499         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5500                 buf[i] = 0;
5501
5502         err = tracing_set_tracer(tr, buf);
5503         if (err)
5504                 return err;
5505
5506         *ppos += ret;
5507
5508         return ret;
5509 }
5510
5511 static ssize_t
5512 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5513                    size_t cnt, loff_t *ppos)
5514 {
5515         char buf[64];
5516         int r;
5517
5518         r = snprintf(buf, sizeof(buf), "%ld\n",
5519                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5520         if (r > sizeof(buf))
5521                 r = sizeof(buf);
5522         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5523 }
5524
5525 static ssize_t
5526 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5527                     size_t cnt, loff_t *ppos)
5528 {
5529         unsigned long val;
5530         int ret;
5531
5532         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5533         if (ret)
5534                 return ret;
5535
5536         *ptr = val * 1000;
5537
5538         return cnt;
5539 }
5540
5541 static ssize_t
5542 tracing_thresh_read(struct file *filp, char __user *ubuf,
5543                     size_t cnt, loff_t *ppos)
5544 {
5545         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5546 }
5547
5548 static ssize_t
5549 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5550                      size_t cnt, loff_t *ppos)
5551 {
5552         struct trace_array *tr = filp->private_data;
5553         int ret;
5554
5555         mutex_lock(&trace_types_lock);
5556         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5557         if (ret < 0)
5558                 goto out;
5559
5560         if (tr->current_trace->update_thresh) {
5561                 ret = tr->current_trace->update_thresh(tr);
5562                 if (ret < 0)
5563                         goto out;
5564         }
5565
5566         ret = cnt;
5567 out:
5568         mutex_unlock(&trace_types_lock);
5569
5570         return ret;
5571 }
5572
5573 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5574
5575 static ssize_t
5576 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5577                      size_t cnt, loff_t *ppos)
5578 {
5579         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5580 }
5581
5582 static ssize_t
5583 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5584                       size_t cnt, loff_t *ppos)
5585 {
5586         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5587 }
5588
5589 #endif
5590
5591 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5592 {
5593         struct trace_array *tr = inode->i_private;
5594         struct trace_iterator *iter;
5595         int ret = 0;
5596
5597         if (tracing_disabled)
5598                 return -ENODEV;
5599
5600         if (trace_array_get(tr) < 0)
5601                 return -ENODEV;
5602
5603         mutex_lock(&trace_types_lock);
5604
5605         /* create a buffer to store the information to pass to userspace */
5606         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5607         if (!iter) {
5608                 ret = -ENOMEM;
5609                 __trace_array_put(tr);
5610                 goto out;
5611         }
5612
5613         trace_seq_init(&iter->seq);
5614         iter->trace = tr->current_trace;
5615
5616         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5617                 ret = -ENOMEM;
5618                 goto fail;
5619         }
5620
5621         /* trace pipe does not show start of buffer */
5622         cpumask_setall(iter->started);
5623
5624         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5625                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5626
5627         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5628         if (trace_clocks[tr->clock_id].in_ns)
5629                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5630
5631         iter->tr = tr;
5632         iter->trace_buffer = &tr->trace_buffer;
5633         iter->cpu_file = tracing_get_cpu(inode);
5634         mutex_init(&iter->mutex);
5635         filp->private_data = iter;
5636
5637         if (iter->trace->pipe_open)
5638                 iter->trace->pipe_open(iter);
5639
5640         nonseekable_open(inode, filp);
5641
5642         tr->current_trace->ref++;
5643 out:
5644         mutex_unlock(&trace_types_lock);
5645         return ret;
5646
5647 fail:
5648         kfree(iter);
5649         __trace_array_put(tr);
5650         mutex_unlock(&trace_types_lock);
5651         return ret;
5652 }
5653
5654 static int tracing_release_pipe(struct inode *inode, struct file *file)
5655 {
5656         struct trace_iterator *iter = file->private_data;
5657         struct trace_array *tr = inode->i_private;
5658
5659         mutex_lock(&trace_types_lock);
5660
5661         tr->current_trace->ref--;
5662
5663         if (iter->trace->pipe_close)
5664                 iter->trace->pipe_close(iter);
5665
5666         mutex_unlock(&trace_types_lock);
5667
5668         free_cpumask_var(iter->started);
5669         mutex_destroy(&iter->mutex);
5670         kfree(iter);
5671
5672         trace_array_put(tr);
5673
5674         return 0;
5675 }
5676
5677 static unsigned int
5678 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5679 {
5680         struct trace_array *tr = iter->tr;
5681
5682         /* Iterators are static, they should be filled or empty */
5683         if (trace_buffer_iter(iter, iter->cpu_file))
5684                 return POLLIN | POLLRDNORM;
5685
5686         if (tr->trace_flags & TRACE_ITER_BLOCK)
5687                 /*
5688                  * Always select as readable when in blocking mode
5689                  */
5690                 return POLLIN | POLLRDNORM;
5691         else
5692                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5693                                              filp, poll_table);
5694 }
5695
5696 static unsigned int
5697 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5698 {
5699         struct trace_iterator *iter = filp->private_data;
5700
5701         return trace_poll(iter, filp, poll_table);
5702 }
5703
5704 /* Must be called with iter->mutex held. */
5705 static int tracing_wait_pipe(struct file *filp)
5706 {
5707         struct trace_iterator *iter = filp->private_data;
5708         int ret;
5709
5710         while (trace_empty(iter)) {
5711
5712                 if ((filp->f_flags & O_NONBLOCK)) {
5713                         return -EAGAIN;
5714                 }
5715
5716                 /*
5717                  * We block until we read something and tracing is disabled.
5718                  * We still block if tracing is disabled, but we have never
5719                  * read anything. This allows a user to cat this file, and
5720                  * then enable tracing. But after we have read something,
5721                  * we give an EOF when tracing is again disabled.
5722                  *
5723                  * iter->pos will be 0 if we haven't read anything.
5724                  */
5725                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5726                         break;
5727
5728                 mutex_unlock(&iter->mutex);
5729
5730                 ret = wait_on_pipe(iter, false);
5731
5732                 mutex_lock(&iter->mutex);
5733
5734                 if (ret)
5735                         return ret;
5736         }
5737
5738         return 1;
5739 }
5740
5741 /*
5742  * Consumer reader.
5743  */
5744 static ssize_t
5745 tracing_read_pipe(struct file *filp, char __user *ubuf,
5746                   size_t cnt, loff_t *ppos)
5747 {
5748         struct trace_iterator *iter = filp->private_data;
5749         ssize_t sret;
5750
5751         /*
5752          * Avoid more than one consumer on a single file descriptor
5753          * This is just a matter of traces coherency, the ring buffer itself
5754          * is protected.
5755          */
5756         mutex_lock(&iter->mutex);
5757
5758         /* return any leftover data */
5759         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5760         if (sret != -EBUSY)
5761                 goto out;
5762
5763         trace_seq_init(&iter->seq);
5764
5765         if (iter->trace->read) {
5766                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5767                 if (sret)
5768                         goto out;
5769         }
5770
5771 waitagain:
5772         sret = tracing_wait_pipe(filp);
5773         if (sret <= 0)
5774                 goto out;
5775
5776         /* stop when tracing is finished */
5777         if (trace_empty(iter)) {
5778                 sret = 0;
5779                 goto out;
5780         }
5781
5782         if (cnt >= PAGE_SIZE)
5783                 cnt = PAGE_SIZE - 1;
5784
5785         /* reset all but tr, trace, and overruns */
5786         memset(&iter->seq, 0,
5787                sizeof(struct trace_iterator) -
5788                offsetof(struct trace_iterator, seq));
5789         cpumask_clear(iter->started);
5790         trace_seq_init(&iter->seq);
5791         iter->pos = -1;
5792
5793         trace_event_read_lock();
5794         trace_access_lock(iter->cpu_file);
5795         while (trace_find_next_entry_inc(iter) != NULL) {
5796                 enum print_line_t ret;
5797                 int save_len = iter->seq.seq.len;
5798
5799                 ret = print_trace_line(iter);
5800                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5801                         /*
5802                          * If one print_trace_line() fills entire trace_seq in one shot,
5803                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
5804                          * In this case, we need to consume it, otherwise, loop will peek
5805                          * this event next time, resulting in an infinite loop.
5806                          */
5807                         if (save_len == 0) {
5808                                 iter->seq.full = 0;
5809                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
5810                                 trace_consume(iter);
5811                                 break;
5812                         }
5813
5814                         /* In other cases, don't print partial lines */
5815                         iter->seq.seq.len = save_len;
5816                         break;
5817                 }
5818                 if (ret != TRACE_TYPE_NO_CONSUME)
5819                         trace_consume(iter);
5820
5821                 if (trace_seq_used(&iter->seq) >= cnt)
5822                         break;
5823
5824                 /*
5825                  * Setting the full flag means we reached the trace_seq buffer
5826                  * size and we should leave by partial output condition above.
5827                  * One of the trace_seq_* functions is not used properly.
5828                  */
5829                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5830                           iter->ent->type);
5831         }
5832         trace_access_unlock(iter->cpu_file);
5833         trace_event_read_unlock();
5834
5835         /* Now copy what we have to the user */
5836         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5837         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5838                 trace_seq_init(&iter->seq);
5839
5840         /*
5841          * If there was nothing to send to user, in spite of consuming trace
5842          * entries, go back to wait for more entries.
5843          */
5844         if (sret == -EBUSY)
5845                 goto waitagain;
5846
5847 out:
5848         mutex_unlock(&iter->mutex);
5849
5850         return sret;
5851 }
5852
5853 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5854                                      unsigned int idx)
5855 {
5856         __free_page(spd->pages[idx]);
5857 }
5858
5859 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5860         .can_merge              = 0,
5861         .confirm                = generic_pipe_buf_confirm,
5862         .release                = generic_pipe_buf_release,
5863         .steal                  = generic_pipe_buf_steal,
5864         .get                    = generic_pipe_buf_get,
5865 };
5866
5867 static size_t
5868 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5869 {
5870         size_t count;
5871         int save_len;
5872         int ret;
5873
5874         /* Seq buffer is page-sized, exactly what we need. */
5875         for (;;) {
5876                 save_len = iter->seq.seq.len;
5877                 ret = print_trace_line(iter);
5878
5879                 if (trace_seq_has_overflowed(&iter->seq)) {
5880                         iter->seq.seq.len = save_len;
5881                         break;
5882                 }
5883
5884                 /*
5885                  * This should not be hit, because it should only
5886                  * be set if the iter->seq overflowed. But check it
5887                  * anyway to be safe.
5888                  */
5889                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5890                         iter->seq.seq.len = save_len;
5891                         break;
5892                 }
5893
5894                 count = trace_seq_used(&iter->seq) - save_len;
5895                 if (rem < count) {
5896                         rem = 0;
5897                         iter->seq.seq.len = save_len;
5898                         break;
5899                 }
5900
5901                 if (ret != TRACE_TYPE_NO_CONSUME)
5902                         trace_consume(iter);
5903                 rem -= count;
5904                 if (!trace_find_next_entry_inc(iter))   {
5905                         rem = 0;
5906                         iter->ent = NULL;
5907                         break;
5908                 }
5909         }
5910
5911         return rem;
5912 }
5913
5914 static ssize_t tracing_splice_read_pipe(struct file *filp,
5915                                         loff_t *ppos,
5916                                         struct pipe_inode_info *pipe,
5917                                         size_t len,
5918                                         unsigned int flags)
5919 {
5920         struct page *pages_def[PIPE_DEF_BUFFERS];
5921         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5922         struct trace_iterator *iter = filp->private_data;
5923         struct splice_pipe_desc spd = {
5924                 .pages          = pages_def,
5925                 .partial        = partial_def,
5926                 .nr_pages       = 0, /* This gets updated below. */
5927                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5928                 .ops            = &tracing_pipe_buf_ops,
5929                 .spd_release    = tracing_spd_release_pipe,
5930         };
5931         ssize_t ret;
5932         size_t rem;
5933         unsigned int i;
5934
5935         if (splice_grow_spd(pipe, &spd))
5936                 return -ENOMEM;
5937
5938         mutex_lock(&iter->mutex);
5939
5940         if (iter->trace->splice_read) {
5941                 ret = iter->trace->splice_read(iter, filp,
5942                                                ppos, pipe, len, flags);
5943                 if (ret)
5944                         goto out_err;
5945         }
5946
5947         ret = tracing_wait_pipe(filp);
5948         if (ret <= 0)
5949                 goto out_err;
5950
5951         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5952                 ret = -EFAULT;
5953                 goto out_err;
5954         }
5955
5956         trace_event_read_lock();
5957         trace_access_lock(iter->cpu_file);
5958
5959         /* Fill as many pages as possible. */
5960         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5961                 spd.pages[i] = alloc_page(GFP_KERNEL);
5962                 if (!spd.pages[i])
5963                         break;
5964
5965                 rem = tracing_fill_pipe_page(rem, iter);
5966
5967                 /* Copy the data into the page, so we can start over. */
5968                 ret = trace_seq_to_buffer(&iter->seq,
5969                                           page_address(spd.pages[i]),
5970                                           trace_seq_used(&iter->seq));
5971                 if (ret < 0) {
5972                         __free_page(spd.pages[i]);
5973                         break;
5974                 }
5975                 spd.partial[i].offset = 0;
5976                 spd.partial[i].len = trace_seq_used(&iter->seq);
5977
5978                 trace_seq_init(&iter->seq);
5979         }
5980
5981         trace_access_unlock(iter->cpu_file);
5982         trace_event_read_unlock();
5983         mutex_unlock(&iter->mutex);
5984
5985         spd.nr_pages = i;
5986
5987         if (i)
5988                 ret = splice_to_pipe(pipe, &spd);
5989         else
5990                 ret = 0;
5991 out:
5992         splice_shrink_spd(&spd);
5993         return ret;
5994
5995 out_err:
5996         mutex_unlock(&iter->mutex);
5997         goto out;
5998 }
5999
6000 static ssize_t
6001 tracing_entries_read(struct file *filp, char __user *ubuf,
6002                      size_t cnt, loff_t *ppos)
6003 {
6004         struct inode *inode = file_inode(filp);
6005         struct trace_array *tr = inode->i_private;
6006         int cpu = tracing_get_cpu(inode);
6007         char buf[64];
6008         int r = 0;
6009         ssize_t ret;
6010
6011         mutex_lock(&trace_types_lock);
6012
6013         if (cpu == RING_BUFFER_ALL_CPUS) {
6014                 int cpu, buf_size_same;
6015                 unsigned long size;
6016
6017                 size = 0;
6018                 buf_size_same = 1;
6019                 /* check if all cpu sizes are same */
6020                 for_each_tracing_cpu(cpu) {
6021                         /* fill in the size from first enabled cpu */
6022                         if (size == 0)
6023                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6024                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6025                                 buf_size_same = 0;
6026                                 break;
6027                         }
6028                 }
6029
6030                 if (buf_size_same) {
6031                         if (!ring_buffer_expanded)
6032                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6033                                             size >> 10,
6034                                             trace_buf_size >> 10);
6035                         else
6036                                 r = sprintf(buf, "%lu\n", size >> 10);
6037                 } else
6038                         r = sprintf(buf, "X\n");
6039         } else
6040                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6041
6042         mutex_unlock(&trace_types_lock);
6043
6044         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6045         return ret;
6046 }
6047
6048 static ssize_t
6049 tracing_entries_write(struct file *filp, const char __user *ubuf,
6050                       size_t cnt, loff_t *ppos)
6051 {
6052         struct inode *inode = file_inode(filp);
6053         struct trace_array *tr = inode->i_private;
6054         unsigned long val;
6055         int ret;
6056
6057         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6058         if (ret)
6059                 return ret;
6060
6061         /* must have at least 1 entry */
6062         if (!val)
6063                 return -EINVAL;
6064
6065         /* value is in KB */
6066         val <<= 10;
6067         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6068         if (ret < 0)
6069                 return ret;
6070
6071         *ppos += cnt;
6072
6073         return cnt;
6074 }
6075
6076 static ssize_t
6077 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6078                                 size_t cnt, loff_t *ppos)
6079 {
6080         struct trace_array *tr = filp->private_data;
6081         char buf[64];
6082         int r, cpu;
6083         unsigned long size = 0, expanded_size = 0;
6084
6085         mutex_lock(&trace_types_lock);
6086         for_each_tracing_cpu(cpu) {
6087                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6088                 if (!ring_buffer_expanded)
6089                         expanded_size += trace_buf_size >> 10;
6090         }
6091         if (ring_buffer_expanded)
6092                 r = sprintf(buf, "%lu\n", size);
6093         else
6094                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6095         mutex_unlock(&trace_types_lock);
6096
6097         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6098 }
6099
6100 static ssize_t
6101 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6102                           size_t cnt, loff_t *ppos)
6103 {
6104         /*
6105          * There is no need to read what the user has written, this function
6106          * is just to make sure that there is no error when "echo" is used
6107          */
6108
6109         *ppos += cnt;
6110
6111         return cnt;
6112 }
6113
6114 static int
6115 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6116 {
6117         struct trace_array *tr = inode->i_private;
6118
6119         /* disable tracing ? */
6120         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6121                 tracer_tracing_off(tr);
6122         /* resize the ring buffer to 0 */
6123         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6124
6125         trace_array_put(tr);
6126
6127         return 0;
6128 }
6129
6130 static ssize_t
6131 tracing_mark_write(struct file *filp, const char __user *ubuf,
6132                                         size_t cnt, loff_t *fpos)
6133 {
6134         struct trace_array *tr = filp->private_data;
6135         struct ring_buffer_event *event;
6136         struct ring_buffer *buffer;
6137         struct print_entry *entry;
6138         unsigned long irq_flags;
6139         const char faulted[] = "<faulted>";
6140         ssize_t written;
6141         int size;
6142         int len;
6143
6144 /* Used in tracing_mark_raw_write() as well */
6145 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6146
6147         if (tracing_disabled)
6148                 return -EINVAL;
6149
6150         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6151                 return -EINVAL;
6152
6153         if (cnt > TRACE_BUF_SIZE)
6154                 cnt = TRACE_BUF_SIZE;
6155
6156         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6157
6158         local_save_flags(irq_flags);
6159         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6160
6161         /* If less than "<faulted>", then make sure we can still add that */
6162         if (cnt < FAULTED_SIZE)
6163                 size += FAULTED_SIZE - cnt;
6164
6165         buffer = tr->trace_buffer.buffer;
6166         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6167                                             irq_flags, preempt_count());
6168         if (unlikely(!event))
6169                 /* Ring buffer disabled, return as if not open for write */
6170                 return -EBADF;
6171
6172         entry = ring_buffer_event_data(event);
6173         entry->ip = _THIS_IP_;
6174
6175         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6176         if (len) {
6177                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6178                 cnt = FAULTED_SIZE;
6179                 written = -EFAULT;
6180         } else
6181                 written = cnt;
6182         len = cnt;
6183
6184         if (entry->buf[cnt - 1] != '\n') {
6185                 entry->buf[cnt] = '\n';
6186                 entry->buf[cnt + 1] = '\0';
6187         } else
6188                 entry->buf[cnt] = '\0';
6189
6190         __buffer_unlock_commit(buffer, event);
6191
6192         if (written > 0)
6193                 *fpos += written;
6194
6195         return written;
6196 }
6197
6198 /* Limit it for now to 3K (including tag) */
6199 #define RAW_DATA_MAX_SIZE (1024*3)
6200
6201 static ssize_t
6202 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6203                                         size_t cnt, loff_t *fpos)
6204 {
6205         struct trace_array *tr = filp->private_data;
6206         struct ring_buffer_event *event;
6207         struct ring_buffer *buffer;
6208         struct raw_data_entry *entry;
6209         const char faulted[] = "<faulted>";
6210         unsigned long irq_flags;
6211         ssize_t written;
6212         int size;
6213         int len;
6214
6215 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6216
6217         if (tracing_disabled)
6218                 return -EINVAL;
6219
6220         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6221                 return -EINVAL;
6222
6223         /* The marker must at least have a tag id */
6224         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6225                 return -EINVAL;
6226
6227         if (cnt > TRACE_BUF_SIZE)
6228                 cnt = TRACE_BUF_SIZE;
6229
6230         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6231
6232         local_save_flags(irq_flags);
6233         size = sizeof(*entry) + cnt;
6234         if (cnt < FAULT_SIZE_ID)
6235                 size += FAULT_SIZE_ID - cnt;
6236
6237         buffer = tr->trace_buffer.buffer;
6238         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6239                                             irq_flags, preempt_count());
6240         if (!event)
6241                 /* Ring buffer disabled, return as if not open for write */
6242                 return -EBADF;
6243
6244         entry = ring_buffer_event_data(event);
6245
6246         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6247         if (len) {
6248                 entry->id = -1;
6249                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6250                 written = -EFAULT;
6251         } else
6252                 written = cnt;
6253
6254         __buffer_unlock_commit(buffer, event);
6255
6256         if (written > 0)
6257                 *fpos += written;
6258
6259         return written;
6260 }
6261
6262 static int tracing_clock_show(struct seq_file *m, void *v)
6263 {
6264         struct trace_array *tr = m->private;
6265         int i;
6266
6267         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6268                 seq_printf(m,
6269                         "%s%s%s%s", i ? " " : "",
6270                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6271                         i == tr->clock_id ? "]" : "");
6272         seq_putc(m, '\n');
6273
6274         return 0;
6275 }
6276
6277 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6278 {
6279         int i;
6280
6281         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6282                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6283                         break;
6284         }
6285         if (i == ARRAY_SIZE(trace_clocks))
6286                 return -EINVAL;
6287
6288         mutex_lock(&trace_types_lock);
6289
6290         tr->clock_id = i;
6291
6292         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6293
6294         /*
6295          * New clock may not be consistent with the previous clock.
6296          * Reset the buffer so that it doesn't have incomparable timestamps.
6297          */
6298         tracing_reset_online_cpus(&tr->trace_buffer);
6299
6300 #ifdef CONFIG_TRACER_MAX_TRACE
6301         if (tr->max_buffer.buffer)
6302                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6303         tracing_reset_online_cpus(&tr->max_buffer);
6304 #endif
6305
6306         mutex_unlock(&trace_types_lock);
6307
6308         return 0;
6309 }
6310
6311 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6312                                    size_t cnt, loff_t *fpos)
6313 {
6314         struct seq_file *m = filp->private_data;
6315         struct trace_array *tr = m->private;
6316         char buf[64];
6317         const char *clockstr;
6318         int ret;
6319
6320         if (cnt >= sizeof(buf))
6321                 return -EINVAL;
6322
6323         if (copy_from_user(buf, ubuf, cnt))
6324                 return -EFAULT;
6325
6326         buf[cnt] = 0;
6327
6328         clockstr = strstrip(buf);
6329
6330         ret = tracing_set_clock(tr, clockstr);
6331         if (ret)
6332                 return ret;
6333
6334         *fpos += cnt;
6335
6336         return cnt;
6337 }
6338
6339 static int tracing_clock_open(struct inode *inode, struct file *file)
6340 {
6341         struct trace_array *tr = inode->i_private;
6342         int ret;
6343
6344         if (tracing_disabled)
6345                 return -ENODEV;
6346
6347         if (trace_array_get(tr))
6348                 return -ENODEV;
6349
6350         ret = single_open(file, tracing_clock_show, inode->i_private);
6351         if (ret < 0)
6352                 trace_array_put(tr);
6353
6354         return ret;
6355 }
6356
6357 struct ftrace_buffer_info {
6358         struct trace_iterator   iter;
6359         void                    *spare;
6360         unsigned int            spare_cpu;
6361         unsigned int            read;
6362 };
6363
6364 #ifdef CONFIG_TRACER_SNAPSHOT
6365 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6366 {
6367         struct trace_array *tr = inode->i_private;
6368         struct trace_iterator *iter;
6369         struct seq_file *m;
6370         int ret = 0;
6371
6372         if (trace_array_get(tr) < 0)
6373                 return -ENODEV;
6374
6375         if (file->f_mode & FMODE_READ) {
6376                 iter = __tracing_open(inode, file, true);
6377                 if (IS_ERR(iter))
6378                         ret = PTR_ERR(iter);
6379         } else {
6380                 /* Writes still need the seq_file to hold the private data */
6381                 ret = -ENOMEM;
6382                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6383                 if (!m)
6384                         goto out;
6385                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6386                 if (!iter) {
6387                         kfree(m);
6388                         goto out;
6389                 }
6390                 ret = 0;
6391
6392                 iter->tr = tr;
6393                 iter->trace_buffer = &tr->max_buffer;
6394                 iter->cpu_file = tracing_get_cpu(inode);
6395                 m->private = iter;
6396                 file->private_data = m;
6397         }
6398 out:
6399         if (ret < 0)
6400                 trace_array_put(tr);
6401
6402         return ret;
6403 }
6404
6405 static ssize_t
6406 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6407                        loff_t *ppos)
6408 {
6409         struct seq_file *m = filp->private_data;
6410         struct trace_iterator *iter = m->private;
6411         struct trace_array *tr = iter->tr;
6412         unsigned long val;
6413         int ret;
6414
6415         ret = tracing_update_buffers();
6416         if (ret < 0)
6417                 return ret;
6418
6419         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6420         if (ret)
6421                 return ret;
6422
6423         mutex_lock(&trace_types_lock);
6424
6425         if (tr->current_trace->use_max_tr) {
6426                 ret = -EBUSY;
6427                 goto out;
6428         }
6429
6430         switch (val) {
6431         case 0:
6432                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6433                         ret = -EINVAL;
6434                         break;
6435                 }
6436                 if (tr->allocated_snapshot)
6437                         free_snapshot(tr);
6438                 break;
6439         case 1:
6440 /* Only allow per-cpu swap if the ring buffer supports it */
6441 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6442                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6443                         ret = -EINVAL;
6444                         break;
6445                 }
6446 #endif
6447                 if (tr->allocated_snapshot)
6448                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6449                                         &tr->trace_buffer, iter->cpu_file);
6450                 else
6451                         ret = tracing_alloc_snapshot_instance(tr);
6452                 if (ret < 0)
6453                         break;
6454                 local_irq_disable();
6455                 /* Now, we're going to swap */
6456                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6457                         update_max_tr(tr, current, smp_processor_id());
6458                 else
6459                         update_max_tr_single(tr, current, iter->cpu_file);
6460                 local_irq_enable();
6461                 break;
6462         default:
6463                 if (tr->allocated_snapshot) {
6464                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6465                                 tracing_reset_online_cpus(&tr->max_buffer);
6466                         else
6467                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6468                 }
6469                 break;
6470         }
6471
6472         if (ret >= 0) {
6473                 *ppos += cnt;
6474                 ret = cnt;
6475         }
6476 out:
6477         mutex_unlock(&trace_types_lock);
6478         return ret;
6479 }
6480
6481 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6482 {
6483         struct seq_file *m = file->private_data;
6484         int ret;
6485
6486         ret = tracing_release(inode, file);
6487
6488         if (file->f_mode & FMODE_READ)
6489                 return ret;
6490
6491         /* If write only, the seq_file is just a stub */
6492         if (m)
6493                 kfree(m->private);
6494         kfree(m);
6495
6496         return 0;
6497 }
6498
6499 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6500 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6501                                     size_t count, loff_t *ppos);
6502 static int tracing_buffers_release(struct inode *inode, struct file *file);
6503 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6504                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6505
6506 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6507 {
6508         struct ftrace_buffer_info *info;
6509         int ret;
6510
6511         ret = tracing_buffers_open(inode, filp);
6512         if (ret < 0)
6513                 return ret;
6514
6515         info = filp->private_data;
6516
6517         if (info->iter.trace->use_max_tr) {
6518                 tracing_buffers_release(inode, filp);
6519                 return -EBUSY;
6520         }
6521
6522         info->iter.snapshot = true;
6523         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6524
6525         return ret;
6526 }
6527
6528 #endif /* CONFIG_TRACER_SNAPSHOT */
6529
6530
6531 static const struct file_operations tracing_thresh_fops = {
6532         .open           = tracing_open_generic,
6533         .read           = tracing_thresh_read,
6534         .write          = tracing_thresh_write,
6535         .llseek         = generic_file_llseek,
6536 };
6537
6538 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6539 static const struct file_operations tracing_max_lat_fops = {
6540         .open           = tracing_open_generic,
6541         .read           = tracing_max_lat_read,
6542         .write          = tracing_max_lat_write,
6543         .llseek         = generic_file_llseek,
6544 };
6545 #endif
6546
6547 static const struct file_operations set_tracer_fops = {
6548         .open           = tracing_open_generic,
6549         .read           = tracing_set_trace_read,
6550         .write          = tracing_set_trace_write,
6551         .llseek         = generic_file_llseek,
6552 };
6553
6554 static const struct file_operations tracing_pipe_fops = {
6555         .open           = tracing_open_pipe,
6556         .poll           = tracing_poll_pipe,
6557         .read           = tracing_read_pipe,
6558         .splice_read    = tracing_splice_read_pipe,
6559         .release        = tracing_release_pipe,
6560         .llseek         = no_llseek,
6561 };
6562
6563 static const struct file_operations tracing_entries_fops = {
6564         .open           = tracing_open_generic_tr,
6565         .read           = tracing_entries_read,
6566         .write          = tracing_entries_write,
6567         .llseek         = generic_file_llseek,
6568         .release        = tracing_release_generic_tr,
6569 };
6570
6571 static const struct file_operations tracing_total_entries_fops = {
6572         .open           = tracing_open_generic_tr,
6573         .read           = tracing_total_entries_read,
6574         .llseek         = generic_file_llseek,
6575         .release        = tracing_release_generic_tr,
6576 };
6577
6578 static const struct file_operations tracing_free_buffer_fops = {
6579         .open           = tracing_open_generic_tr,
6580         .write          = tracing_free_buffer_write,
6581         .release        = tracing_free_buffer_release,
6582 };
6583
6584 static const struct file_operations tracing_mark_fops = {
6585         .open           = tracing_open_generic_tr,
6586         .write          = tracing_mark_write,
6587         .llseek         = generic_file_llseek,
6588         .release        = tracing_release_generic_tr,
6589 };
6590
6591 static const struct file_operations tracing_mark_raw_fops = {
6592         .open           = tracing_open_generic_tr,
6593         .write          = tracing_mark_raw_write,
6594         .llseek         = generic_file_llseek,
6595         .release        = tracing_release_generic_tr,
6596 };
6597
6598 static const struct file_operations trace_clock_fops = {
6599         .open           = tracing_clock_open,
6600         .read           = seq_read,
6601         .llseek         = seq_lseek,
6602         .release        = tracing_single_release_tr,
6603         .write          = tracing_clock_write,
6604 };
6605
6606 #ifdef CONFIG_TRACER_SNAPSHOT
6607 static const struct file_operations snapshot_fops = {
6608         .open           = tracing_snapshot_open,
6609         .read           = seq_read,
6610         .write          = tracing_snapshot_write,
6611         .llseek         = tracing_lseek,
6612         .release        = tracing_snapshot_release,
6613 };
6614
6615 static const struct file_operations snapshot_raw_fops = {
6616         .open           = snapshot_raw_open,
6617         .read           = tracing_buffers_read,
6618         .release        = tracing_buffers_release,
6619         .splice_read    = tracing_buffers_splice_read,
6620         .llseek         = no_llseek,
6621 };
6622
6623 #endif /* CONFIG_TRACER_SNAPSHOT */
6624
6625 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6626 {
6627         struct trace_array *tr = inode->i_private;
6628         struct ftrace_buffer_info *info;
6629         int ret;
6630
6631         if (tracing_disabled)
6632                 return -ENODEV;
6633
6634         if (trace_array_get(tr) < 0)
6635                 return -ENODEV;
6636
6637         info = kzalloc(sizeof(*info), GFP_KERNEL);
6638         if (!info) {
6639                 trace_array_put(tr);
6640                 return -ENOMEM;
6641         }
6642
6643         mutex_lock(&trace_types_lock);
6644
6645         info->iter.tr           = tr;
6646         info->iter.cpu_file     = tracing_get_cpu(inode);
6647         info->iter.trace        = tr->current_trace;
6648         info->iter.trace_buffer = &tr->trace_buffer;
6649         info->spare             = NULL;
6650         /* Force reading ring buffer for first read */
6651         info->read              = (unsigned int)-1;
6652
6653         filp->private_data = info;
6654
6655         tr->current_trace->ref++;
6656
6657         mutex_unlock(&trace_types_lock);
6658
6659         ret = nonseekable_open(inode, filp);
6660         if (ret < 0)
6661                 trace_array_put(tr);
6662
6663         return ret;
6664 }
6665
6666 static unsigned int
6667 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6668 {
6669         struct ftrace_buffer_info *info = filp->private_data;
6670         struct trace_iterator *iter = &info->iter;
6671
6672         return trace_poll(iter, filp, poll_table);
6673 }
6674
6675 static ssize_t
6676 tracing_buffers_read(struct file *filp, char __user *ubuf,
6677                      size_t count, loff_t *ppos)
6678 {
6679         struct ftrace_buffer_info *info = filp->private_data;
6680         struct trace_iterator *iter = &info->iter;
6681         ssize_t ret = 0;
6682         ssize_t size;
6683
6684         if (!count)
6685                 return 0;
6686
6687 #ifdef CONFIG_TRACER_MAX_TRACE
6688         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6689                 return -EBUSY;
6690 #endif
6691
6692         if (!info->spare) {
6693                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6694                                                           iter->cpu_file);
6695                 if (IS_ERR(info->spare)) {
6696                         ret = PTR_ERR(info->spare);
6697                         info->spare = NULL;
6698                 } else {
6699                         info->spare_cpu = iter->cpu_file;
6700                 }
6701         }
6702         if (!info->spare)
6703                 return ret;
6704
6705         /* Do we have previous read data to read? */
6706         if (info->read < PAGE_SIZE)
6707                 goto read;
6708
6709  again:
6710         trace_access_lock(iter->cpu_file);
6711         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6712                                     &info->spare,
6713                                     count,
6714                                     iter->cpu_file, 0);
6715         trace_access_unlock(iter->cpu_file);
6716
6717         if (ret < 0) {
6718                 if (trace_empty(iter)) {
6719                         if ((filp->f_flags & O_NONBLOCK))
6720                                 return -EAGAIN;
6721
6722                         ret = wait_on_pipe(iter, false);
6723                         if (ret)
6724                                 return ret;
6725
6726                         goto again;
6727                 }
6728                 return 0;
6729         }
6730
6731         info->read = 0;
6732  read:
6733         size = PAGE_SIZE - info->read;
6734         if (size > count)
6735                 size = count;
6736
6737         ret = copy_to_user(ubuf, info->spare + info->read, size);
6738         if (ret == size)
6739                 return -EFAULT;
6740
6741         size -= ret;
6742
6743         *ppos += size;
6744         info->read += size;
6745
6746         return size;
6747 }
6748
6749 static int tracing_buffers_release(struct inode *inode, struct file *file)
6750 {
6751         struct ftrace_buffer_info *info = file->private_data;
6752         struct trace_iterator *iter = &info->iter;
6753
6754         mutex_lock(&trace_types_lock);
6755
6756         iter->tr->current_trace->ref--;
6757
6758         __trace_array_put(iter->tr);
6759
6760         if (info->spare)
6761                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6762                                            info->spare_cpu, info->spare);
6763         kfree(info);
6764
6765         mutex_unlock(&trace_types_lock);
6766
6767         return 0;
6768 }
6769
6770 struct buffer_ref {
6771         struct ring_buffer      *buffer;
6772         void                    *page;
6773         int                     cpu;
6774         refcount_t              refcount;
6775 };
6776
6777 static void buffer_ref_release(struct buffer_ref *ref)
6778 {
6779         if (!refcount_dec_and_test(&ref->refcount))
6780                 return;
6781         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6782         kfree(ref);
6783 }
6784
6785 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6786                                     struct pipe_buffer *buf)
6787 {
6788         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6789
6790         buffer_ref_release(ref);
6791         buf->private = 0;
6792 }
6793
6794 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6795                                 struct pipe_buffer *buf)
6796 {
6797         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6798
6799         if (refcount_read(&ref->refcount) > INT_MAX/2)
6800                 return false;
6801
6802         refcount_inc(&ref->refcount);
6803         return true;
6804 }
6805
6806 /* Pipe buffer operations for a buffer. */
6807 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6808         .can_merge              = 0,
6809         .confirm                = generic_pipe_buf_confirm,
6810         .release                = buffer_pipe_buf_release,
6811         .steal                  = generic_pipe_buf_nosteal,
6812         .get                    = buffer_pipe_buf_get,
6813 };
6814
6815 /*
6816  * Callback from splice_to_pipe(), if we need to release some pages
6817  * at the end of the spd in case we error'ed out in filling the pipe.
6818  */
6819 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6820 {
6821         struct buffer_ref *ref =
6822                 (struct buffer_ref *)spd->partial[i].private;
6823
6824         buffer_ref_release(ref);
6825         spd->partial[i].private = 0;
6826 }
6827
6828 static ssize_t
6829 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6830                             struct pipe_inode_info *pipe, size_t len,
6831                             unsigned int flags)
6832 {
6833         struct ftrace_buffer_info *info = file->private_data;
6834         struct trace_iterator *iter = &info->iter;
6835         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6836         struct page *pages_def[PIPE_DEF_BUFFERS];
6837         struct splice_pipe_desc spd = {
6838                 .pages          = pages_def,
6839                 .partial        = partial_def,
6840                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6841                 .ops            = &buffer_pipe_buf_ops,
6842                 .spd_release    = buffer_spd_release,
6843         };
6844         struct buffer_ref *ref;
6845         int entries, i;
6846         ssize_t ret = 0;
6847
6848 #ifdef CONFIG_TRACER_MAX_TRACE
6849         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6850                 return -EBUSY;
6851 #endif
6852
6853         if (*ppos & (PAGE_SIZE - 1))
6854                 return -EINVAL;
6855
6856         if (len & (PAGE_SIZE - 1)) {
6857                 if (len < PAGE_SIZE)
6858                         return -EINVAL;
6859                 len &= PAGE_MASK;
6860         }
6861
6862         if (splice_grow_spd(pipe, &spd))
6863                 return -ENOMEM;
6864
6865  again:
6866         trace_access_lock(iter->cpu_file);
6867         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6868
6869         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6870                 struct page *page;
6871                 int r;
6872
6873                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6874                 if (!ref) {
6875                         ret = -ENOMEM;
6876                         break;
6877                 }
6878
6879                 refcount_set(&ref->refcount, 1);
6880                 ref->buffer = iter->trace_buffer->buffer;
6881                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6882                 if (IS_ERR(ref->page)) {
6883                         ret = PTR_ERR(ref->page);
6884                         ref->page = NULL;
6885                         kfree(ref);
6886                         break;
6887                 }
6888                 ref->cpu = iter->cpu_file;
6889
6890                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6891                                           len, iter->cpu_file, 1);
6892                 if (r < 0) {
6893                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6894                                                    ref->page);
6895                         kfree(ref);
6896                         break;
6897                 }
6898
6899                 page = virt_to_page(ref->page);
6900
6901                 spd.pages[i] = page;
6902                 spd.partial[i].len = PAGE_SIZE;
6903                 spd.partial[i].offset = 0;
6904                 spd.partial[i].private = (unsigned long)ref;
6905                 spd.nr_pages++;
6906                 *ppos += PAGE_SIZE;
6907
6908                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6909         }
6910
6911         trace_access_unlock(iter->cpu_file);
6912         spd.nr_pages = i;
6913
6914         /* did we read anything? */
6915         if (!spd.nr_pages) {
6916                 if (ret)
6917                         goto out;
6918
6919                 ret = -EAGAIN;
6920                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6921                         goto out;
6922
6923                 ret = wait_on_pipe(iter, true);
6924                 if (ret)
6925                         goto out;
6926
6927                 goto again;
6928         }
6929
6930         ret = splice_to_pipe(pipe, &spd);
6931 out:
6932         splice_shrink_spd(&spd);
6933
6934         return ret;
6935 }
6936
6937 static const struct file_operations tracing_buffers_fops = {
6938         .open           = tracing_buffers_open,
6939         .read           = tracing_buffers_read,
6940         .poll           = tracing_buffers_poll,
6941         .release        = tracing_buffers_release,
6942         .splice_read    = tracing_buffers_splice_read,
6943         .llseek         = no_llseek,
6944 };
6945
6946 static ssize_t
6947 tracing_stats_read(struct file *filp, char __user *ubuf,
6948                    size_t count, loff_t *ppos)
6949 {
6950         struct inode *inode = file_inode(filp);
6951         struct trace_array *tr = inode->i_private;
6952         struct trace_buffer *trace_buf = &tr->trace_buffer;
6953         int cpu = tracing_get_cpu(inode);
6954         struct trace_seq *s;
6955         unsigned long cnt;
6956         unsigned long long t;
6957         unsigned long usec_rem;
6958
6959         s = kmalloc(sizeof(*s), GFP_KERNEL);
6960         if (!s)
6961                 return -ENOMEM;
6962
6963         trace_seq_init(s);
6964
6965         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6966         trace_seq_printf(s, "entries: %ld\n", cnt);
6967
6968         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6969         trace_seq_printf(s, "overrun: %ld\n", cnt);
6970
6971         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6972         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6973
6974         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6975         trace_seq_printf(s, "bytes: %ld\n", cnt);
6976
6977         if (trace_clocks[tr->clock_id].in_ns) {
6978                 /* local or global for trace_clock */
6979                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6980                 usec_rem = do_div(t, USEC_PER_SEC);
6981                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6982                                                                 t, usec_rem);
6983
6984                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6985                 usec_rem = do_div(t, USEC_PER_SEC);
6986                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6987         } else {
6988                 /* counter or tsc mode for trace_clock */
6989                 trace_seq_printf(s, "oldest event ts: %llu\n",
6990                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6991
6992                 trace_seq_printf(s, "now ts: %llu\n",
6993                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6994         }
6995
6996         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6997         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6998
6999         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7000         trace_seq_printf(s, "read events: %ld\n", cnt);
7001
7002         count = simple_read_from_buffer(ubuf, count, ppos,
7003                                         s->buffer, trace_seq_used(s));
7004
7005         kfree(s);
7006
7007         return count;
7008 }
7009
7010 static const struct file_operations tracing_stats_fops = {
7011         .open           = tracing_open_generic_tr,
7012         .read           = tracing_stats_read,
7013         .llseek         = generic_file_llseek,
7014         .release        = tracing_release_generic_tr,
7015 };
7016
7017 #ifdef CONFIG_DYNAMIC_FTRACE
7018
7019 static ssize_t
7020 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7021                   size_t cnt, loff_t *ppos)
7022 {
7023         unsigned long *p = filp->private_data;
7024         char buf[64]; /* Not too big for a shallow stack */
7025         int r;
7026
7027         r = scnprintf(buf, 63, "%ld", *p);
7028         buf[r++] = '\n';
7029
7030         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7031 }
7032
7033 static const struct file_operations tracing_dyn_info_fops = {
7034         .open           = tracing_open_generic,
7035         .read           = tracing_read_dyn_info,
7036         .llseek         = generic_file_llseek,
7037 };
7038 #endif /* CONFIG_DYNAMIC_FTRACE */
7039
7040 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7041 static void
7042 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7043                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7044                 void *data)
7045 {
7046         tracing_snapshot_instance(tr);
7047 }
7048
7049 static void
7050 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7051                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7052                       void *data)
7053 {
7054         struct ftrace_func_mapper *mapper = data;
7055         long *count = NULL;
7056
7057         if (mapper)
7058                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7059
7060         if (count) {
7061
7062                 if (*count <= 0)
7063                         return;
7064
7065                 (*count)--;
7066         }
7067
7068         tracing_snapshot_instance(tr);
7069 }
7070
7071 static int
7072 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7073                       struct ftrace_probe_ops *ops, void *data)
7074 {
7075         struct ftrace_func_mapper *mapper = data;
7076         long *count = NULL;
7077
7078         seq_printf(m, "%ps:", (void *)ip);
7079
7080         seq_puts(m, "snapshot");
7081
7082         if (mapper)
7083                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7084
7085         if (count)
7086                 seq_printf(m, ":count=%ld\n", *count);
7087         else
7088                 seq_puts(m, ":unlimited\n");
7089
7090         return 0;
7091 }
7092
7093 static int
7094 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7095                      unsigned long ip, void *init_data, void **data)
7096 {
7097         struct ftrace_func_mapper *mapper = *data;
7098
7099         if (!mapper) {
7100                 mapper = allocate_ftrace_func_mapper();
7101                 if (!mapper)
7102                         return -ENOMEM;
7103                 *data = mapper;
7104         }
7105
7106         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7107 }
7108
7109 static void
7110 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7111                      unsigned long ip, void *data)
7112 {
7113         struct ftrace_func_mapper *mapper = data;
7114
7115         if (!ip) {
7116                 if (!mapper)
7117                         return;
7118                 free_ftrace_func_mapper(mapper, NULL);
7119                 return;
7120         }
7121
7122         ftrace_func_mapper_remove_ip(mapper, ip);
7123 }
7124
7125 static struct ftrace_probe_ops snapshot_probe_ops = {
7126         .func                   = ftrace_snapshot,
7127         .print                  = ftrace_snapshot_print,
7128 };
7129
7130 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7131         .func                   = ftrace_count_snapshot,
7132         .print                  = ftrace_snapshot_print,
7133         .init                   = ftrace_snapshot_init,
7134         .free                   = ftrace_snapshot_free,
7135 };
7136
7137 static int
7138 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7139                                char *glob, char *cmd, char *param, int enable)
7140 {
7141         struct ftrace_probe_ops *ops;
7142         void *count = (void *)-1;
7143         char *number;
7144         int ret;
7145
7146         if (!tr)
7147                 return -ENODEV;
7148
7149         /* hash funcs only work with set_ftrace_filter */
7150         if (!enable)
7151                 return -EINVAL;
7152
7153         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7154
7155         if (glob[0] == '!')
7156                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7157
7158         if (!param)
7159                 goto out_reg;
7160
7161         number = strsep(&param, ":");
7162
7163         if (!strlen(number))
7164                 goto out_reg;
7165
7166         /*
7167          * We use the callback data field (which is a pointer)
7168          * as our counter.
7169          */
7170         ret = kstrtoul(number, 0, (unsigned long *)&count);
7171         if (ret)
7172                 return ret;
7173
7174  out_reg:
7175         ret = tracing_alloc_snapshot_instance(tr);
7176         if (ret < 0)
7177                 goto out;
7178
7179         ret = register_ftrace_function_probe(glob, tr, ops, count);
7180
7181  out:
7182         return ret < 0 ? ret : 0;
7183 }
7184
7185 static struct ftrace_func_command ftrace_snapshot_cmd = {
7186         .name                   = "snapshot",
7187         .func                   = ftrace_trace_snapshot_callback,
7188 };
7189
7190 static __init int register_snapshot_cmd(void)
7191 {
7192         return register_ftrace_command(&ftrace_snapshot_cmd);
7193 }
7194 #else
7195 static inline __init int register_snapshot_cmd(void) { return 0; }
7196 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7197
7198 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7199 {
7200         if (WARN_ON(!tr->dir))
7201                 return ERR_PTR(-ENODEV);
7202
7203         /* Top directory uses NULL as the parent */
7204         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7205                 return NULL;
7206
7207         /* All sub buffers have a descriptor */
7208         return tr->dir;
7209 }
7210
7211 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7212 {
7213         struct dentry *d_tracer;
7214
7215         if (tr->percpu_dir)
7216                 return tr->percpu_dir;
7217
7218         d_tracer = tracing_get_dentry(tr);
7219         if (IS_ERR(d_tracer))
7220                 return NULL;
7221
7222         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7223
7224         WARN_ONCE(!tr->percpu_dir,
7225                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7226
7227         return tr->percpu_dir;
7228 }
7229
7230 static struct dentry *
7231 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7232                       void *data, long cpu, const struct file_operations *fops)
7233 {
7234         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7235
7236         if (ret) /* See tracing_get_cpu() */
7237                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7238         return ret;
7239 }
7240
7241 static void
7242 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7243 {
7244         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7245         struct dentry *d_cpu;
7246         char cpu_dir[30]; /* 30 characters should be more than enough */
7247
7248         if (!d_percpu)
7249                 return;
7250
7251         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7252         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7253         if (!d_cpu) {
7254                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7255                 return;
7256         }
7257
7258         /* per cpu trace_pipe */
7259         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7260                                 tr, cpu, &tracing_pipe_fops);
7261
7262         /* per cpu trace */
7263         trace_create_cpu_file("trace", 0644, d_cpu,
7264                                 tr, cpu, &tracing_fops);
7265
7266         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7267                                 tr, cpu, &tracing_buffers_fops);
7268
7269         trace_create_cpu_file("stats", 0444, d_cpu,
7270                                 tr, cpu, &tracing_stats_fops);
7271
7272         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7273                                 tr, cpu, &tracing_entries_fops);
7274
7275 #ifdef CONFIG_TRACER_SNAPSHOT
7276         trace_create_cpu_file("snapshot", 0644, d_cpu,
7277                                 tr, cpu, &snapshot_fops);
7278
7279         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7280                                 tr, cpu, &snapshot_raw_fops);
7281 #endif
7282 }
7283
7284 #ifdef CONFIG_FTRACE_SELFTEST
7285 /* Let selftest have access to static functions in this file */
7286 #include "trace_selftest.c"
7287 #endif
7288
7289 static ssize_t
7290 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7291                         loff_t *ppos)
7292 {
7293         struct trace_option_dentry *topt = filp->private_data;
7294         char *buf;
7295
7296         if (topt->flags->val & topt->opt->bit)
7297                 buf = "1\n";
7298         else
7299                 buf = "0\n";
7300
7301         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7302 }
7303
7304 static ssize_t
7305 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7306                          loff_t *ppos)
7307 {
7308         struct trace_option_dentry *topt = filp->private_data;
7309         unsigned long val;
7310         int ret;
7311
7312         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7313         if (ret)
7314                 return ret;
7315
7316         if (val != 0 && val != 1)
7317                 return -EINVAL;
7318
7319         if (!!(topt->flags->val & topt->opt->bit) != val) {
7320                 mutex_lock(&trace_types_lock);
7321                 ret = __set_tracer_option(topt->tr, topt->flags,
7322                                           topt->opt, !val);
7323                 mutex_unlock(&trace_types_lock);
7324                 if (ret)
7325                         return ret;
7326         }
7327
7328         *ppos += cnt;
7329
7330         return cnt;
7331 }
7332
7333
7334 static const struct file_operations trace_options_fops = {
7335         .open = tracing_open_generic,
7336         .read = trace_options_read,
7337         .write = trace_options_write,
7338         .llseek = generic_file_llseek,
7339 };
7340
7341 /*
7342  * In order to pass in both the trace_array descriptor as well as the index
7343  * to the flag that the trace option file represents, the trace_array
7344  * has a character array of trace_flags_index[], which holds the index
7345  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7346  * The address of this character array is passed to the flag option file
7347  * read/write callbacks.
7348  *
7349  * In order to extract both the index and the trace_array descriptor,
7350  * get_tr_index() uses the following algorithm.
7351  *
7352  *   idx = *ptr;
7353  *
7354  * As the pointer itself contains the address of the index (remember
7355  * index[1] == 1).
7356  *
7357  * Then to get the trace_array descriptor, by subtracting that index
7358  * from the ptr, we get to the start of the index itself.
7359  *
7360  *   ptr - idx == &index[0]
7361  *
7362  * Then a simple container_of() from that pointer gets us to the
7363  * trace_array descriptor.
7364  */
7365 static void get_tr_index(void *data, struct trace_array **ptr,
7366                          unsigned int *pindex)
7367 {
7368         *pindex = *(unsigned char *)data;
7369
7370         *ptr = container_of(data - *pindex, struct trace_array,
7371                             trace_flags_index);
7372 }
7373
7374 static ssize_t
7375 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7376                         loff_t *ppos)
7377 {
7378         void *tr_index = filp->private_data;
7379         struct trace_array *tr;
7380         unsigned int index;
7381         char *buf;
7382
7383         get_tr_index(tr_index, &tr, &index);
7384
7385         if (tr->trace_flags & (1 << index))
7386                 buf = "1\n";
7387         else
7388                 buf = "0\n";
7389
7390         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7391 }
7392
7393 static ssize_t
7394 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7395                          loff_t *ppos)
7396 {
7397         void *tr_index = filp->private_data;
7398         struct trace_array *tr;
7399         unsigned int index;
7400         unsigned long val;
7401         int ret;
7402
7403         get_tr_index(tr_index, &tr, &index);
7404
7405         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7406         if (ret)
7407                 return ret;
7408
7409         if (val != 0 && val != 1)
7410                 return -EINVAL;
7411
7412         mutex_lock(&event_mutex);
7413         mutex_lock(&trace_types_lock);
7414         ret = set_tracer_flag(tr, 1 << index, val);
7415         mutex_unlock(&trace_types_lock);
7416         mutex_unlock(&event_mutex);
7417
7418         if (ret < 0)
7419                 return ret;
7420
7421         *ppos += cnt;
7422
7423         return cnt;
7424 }
7425
7426 static const struct file_operations trace_options_core_fops = {
7427         .open = tracing_open_generic,
7428         .read = trace_options_core_read,
7429         .write = trace_options_core_write,
7430         .llseek = generic_file_llseek,
7431 };
7432
7433 struct dentry *trace_create_file(const char *name,
7434                                  umode_t mode,
7435                                  struct dentry *parent,
7436                                  void *data,
7437                                  const struct file_operations *fops)
7438 {
7439         struct dentry *ret;
7440
7441         ret = tracefs_create_file(name, mode, parent, data, fops);
7442         if (!ret)
7443                 pr_warn("Could not create tracefs '%s' entry\n", name);
7444
7445         return ret;
7446 }
7447
7448
7449 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7450 {
7451         struct dentry *d_tracer;
7452
7453         if (tr->options)
7454                 return tr->options;
7455
7456         d_tracer = tracing_get_dentry(tr);
7457         if (IS_ERR(d_tracer))
7458                 return NULL;
7459
7460         tr->options = tracefs_create_dir("options", d_tracer);
7461         if (!tr->options) {
7462                 pr_warn("Could not create tracefs directory 'options'\n");
7463                 return NULL;
7464         }
7465
7466         return tr->options;
7467 }
7468
7469 static void
7470 create_trace_option_file(struct trace_array *tr,
7471                          struct trace_option_dentry *topt,
7472                          struct tracer_flags *flags,
7473                          struct tracer_opt *opt)
7474 {
7475         struct dentry *t_options;
7476
7477         t_options = trace_options_init_dentry(tr);
7478         if (!t_options)
7479                 return;
7480
7481         topt->flags = flags;
7482         topt->opt = opt;
7483         topt->tr = tr;
7484
7485         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7486                                     &trace_options_fops);
7487
7488 }
7489
7490 static void
7491 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7492 {
7493         struct trace_option_dentry *topts;
7494         struct trace_options *tr_topts;
7495         struct tracer_flags *flags;
7496         struct tracer_opt *opts;
7497         int cnt;
7498         int i;
7499
7500         if (!tracer)
7501                 return;
7502
7503         flags = tracer->flags;
7504
7505         if (!flags || !flags->opts)
7506                 return;
7507
7508         /*
7509          * If this is an instance, only create flags for tracers
7510          * the instance may have.
7511          */
7512         if (!trace_ok_for_array(tracer, tr))
7513                 return;
7514
7515         for (i = 0; i < tr->nr_topts; i++) {
7516                 /* Make sure there's no duplicate flags. */
7517                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7518                         return;
7519         }
7520
7521         opts = flags->opts;
7522
7523         for (cnt = 0; opts[cnt].name; cnt++)
7524                 ;
7525
7526         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7527         if (!topts)
7528                 return;
7529
7530         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7531                             GFP_KERNEL);
7532         if (!tr_topts) {
7533                 kfree(topts);
7534                 return;
7535         }
7536
7537         tr->topts = tr_topts;
7538         tr->topts[tr->nr_topts].tracer = tracer;
7539         tr->topts[tr->nr_topts].topts = topts;
7540         tr->nr_topts++;
7541
7542         for (cnt = 0; opts[cnt].name; cnt++) {
7543                 create_trace_option_file(tr, &topts[cnt], flags,
7544                                          &opts[cnt]);
7545                 WARN_ONCE(topts[cnt].entry == NULL,
7546                           "Failed to create trace option: %s",
7547                           opts[cnt].name);
7548         }
7549 }
7550
7551 static struct dentry *
7552 create_trace_option_core_file(struct trace_array *tr,
7553                               const char *option, long index)
7554 {
7555         struct dentry *t_options;
7556
7557         t_options = trace_options_init_dentry(tr);
7558         if (!t_options)
7559                 return NULL;
7560
7561         return trace_create_file(option, 0644, t_options,
7562                                  (void *)&tr->trace_flags_index[index],
7563                                  &trace_options_core_fops);
7564 }
7565
7566 static void create_trace_options_dir(struct trace_array *tr)
7567 {
7568         struct dentry *t_options;
7569         bool top_level = tr == &global_trace;
7570         int i;
7571
7572         t_options = trace_options_init_dentry(tr);
7573         if (!t_options)
7574                 return;
7575
7576         for (i = 0; trace_options[i]; i++) {
7577                 if (top_level ||
7578                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7579                         create_trace_option_core_file(tr, trace_options[i], i);
7580         }
7581 }
7582
7583 static ssize_t
7584 rb_simple_read(struct file *filp, char __user *ubuf,
7585                size_t cnt, loff_t *ppos)
7586 {
7587         struct trace_array *tr = filp->private_data;
7588         char buf[64];
7589         int r;
7590
7591         r = tracer_tracing_is_on(tr);
7592         r = sprintf(buf, "%d\n", r);
7593
7594         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7595 }
7596
7597 static ssize_t
7598 rb_simple_write(struct file *filp, const char __user *ubuf,
7599                 size_t cnt, loff_t *ppos)
7600 {
7601         struct trace_array *tr = filp->private_data;
7602         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7603         unsigned long val;
7604         int ret;
7605
7606         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7607         if (ret)
7608                 return ret;
7609
7610         if (buffer) {
7611                 mutex_lock(&trace_types_lock);
7612                 if (!!val == tracer_tracing_is_on(tr)) {
7613                         val = 0; /* do nothing */
7614                 } else if (val) {
7615                         tracer_tracing_on(tr);
7616                         if (tr->current_trace->start)
7617                                 tr->current_trace->start(tr);
7618                 } else {
7619                         tracer_tracing_off(tr);
7620                         if (tr->current_trace->stop)
7621                                 tr->current_trace->stop(tr);
7622                 }
7623                 mutex_unlock(&trace_types_lock);
7624         }
7625
7626         (*ppos)++;
7627
7628         return cnt;
7629 }
7630
7631 static const struct file_operations rb_simple_fops = {
7632         .open           = tracing_open_generic_tr,
7633         .read           = rb_simple_read,
7634         .write          = rb_simple_write,
7635         .release        = tracing_release_generic_tr,
7636         .llseek         = default_llseek,
7637 };
7638
7639 struct dentry *trace_instance_dir;
7640
7641 static void
7642 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7643
7644 static int
7645 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7646 {
7647         enum ring_buffer_flags rb_flags;
7648
7649         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7650
7651         buf->tr = tr;
7652
7653         buf->buffer = ring_buffer_alloc(size, rb_flags);
7654         if (!buf->buffer)
7655                 return -ENOMEM;
7656
7657         buf->data = alloc_percpu(struct trace_array_cpu);
7658         if (!buf->data) {
7659                 ring_buffer_free(buf->buffer);
7660                 buf->buffer = NULL;
7661                 return -ENOMEM;
7662         }
7663
7664         /* Allocate the first page for all buffers */
7665         set_buffer_entries(&tr->trace_buffer,
7666                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7667
7668         return 0;
7669 }
7670
7671 static int allocate_trace_buffers(struct trace_array *tr, int size)
7672 {
7673         int ret;
7674
7675         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7676         if (ret)
7677                 return ret;
7678
7679 #ifdef CONFIG_TRACER_MAX_TRACE
7680         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7681                                     allocate_snapshot ? size : 1);
7682         if (WARN_ON(ret)) {
7683                 ring_buffer_free(tr->trace_buffer.buffer);
7684                 tr->trace_buffer.buffer = NULL;
7685                 free_percpu(tr->trace_buffer.data);
7686                 tr->trace_buffer.data = NULL;
7687                 return -ENOMEM;
7688         }
7689         tr->allocated_snapshot = allocate_snapshot;
7690
7691         /*
7692          * Only the top level trace array gets its snapshot allocated
7693          * from the kernel command line.
7694          */
7695         allocate_snapshot = false;
7696 #endif
7697
7698         /*
7699          * Because of some magic with the way alloc_percpu() works on
7700          * x86_64, we need to synchronize the pgd of all the tables,
7701          * otherwise the trace events that happen in x86_64 page fault
7702          * handlers can't cope with accessing the chance that a
7703          * alloc_percpu()'d memory might be touched in the page fault trace
7704          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7705          * calls in tracing, because something might get triggered within a
7706          * page fault trace event!
7707          */
7708         vmalloc_sync_mappings();
7709
7710         return 0;
7711 }
7712
7713 static void free_trace_buffer(struct trace_buffer *buf)
7714 {
7715         if (buf->buffer) {
7716                 ring_buffer_free(buf->buffer);
7717                 buf->buffer = NULL;
7718                 free_percpu(buf->data);
7719                 buf->data = NULL;
7720         }
7721 }
7722
7723 static void free_trace_buffers(struct trace_array *tr)
7724 {
7725         if (!tr)
7726                 return;
7727
7728         free_trace_buffer(&tr->trace_buffer);
7729
7730 #ifdef CONFIG_TRACER_MAX_TRACE
7731         free_trace_buffer(&tr->max_buffer);
7732 #endif
7733 }
7734
7735 static void init_trace_flags_index(struct trace_array *tr)
7736 {
7737         int i;
7738
7739         /* Used by the trace options files */
7740         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7741                 tr->trace_flags_index[i] = i;
7742 }
7743
7744 static void __update_tracer_options(struct trace_array *tr)
7745 {
7746         struct tracer *t;
7747
7748         for (t = trace_types; t; t = t->next)
7749                 add_tracer_options(tr, t);
7750 }
7751
7752 static void update_tracer_options(struct trace_array *tr)
7753 {
7754         mutex_lock(&trace_types_lock);
7755         tracer_options_updated = true;
7756         __update_tracer_options(tr);
7757         mutex_unlock(&trace_types_lock);
7758 }
7759
7760 static int instance_mkdir(const char *name)
7761 {
7762         struct trace_array *tr;
7763         int ret;
7764
7765         mutex_lock(&event_mutex);
7766         mutex_lock(&trace_types_lock);
7767
7768         ret = -EEXIST;
7769         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7770                 if (tr->name && strcmp(tr->name, name) == 0)
7771                         goto out_unlock;
7772         }
7773
7774         ret = -ENOMEM;
7775         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7776         if (!tr)
7777                 goto out_unlock;
7778
7779         tr->name = kstrdup(name, GFP_KERNEL);
7780         if (!tr->name)
7781                 goto out_free_tr;
7782
7783         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7784                 goto out_free_tr;
7785
7786         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7787
7788         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7789
7790         raw_spin_lock_init(&tr->start_lock);
7791
7792         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7793
7794         tr->current_trace = &nop_trace;
7795
7796         INIT_LIST_HEAD(&tr->systems);
7797         INIT_LIST_HEAD(&tr->events);
7798
7799         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7800                 goto out_free_tr;
7801
7802         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7803         if (!tr->dir)
7804                 goto out_free_tr;
7805
7806         ret = event_trace_add_tracer(tr->dir, tr);
7807         if (ret) {
7808                 tracefs_remove_recursive(tr->dir);
7809                 goto out_free_tr;
7810         }
7811
7812         ftrace_init_trace_array(tr);
7813
7814         init_tracer_tracefs(tr, tr->dir);
7815         init_trace_flags_index(tr);
7816         __update_tracer_options(tr);
7817
7818         list_add(&tr->list, &ftrace_trace_arrays);
7819
7820         mutex_unlock(&trace_types_lock);
7821         mutex_unlock(&event_mutex);
7822
7823         return 0;
7824
7825  out_free_tr:
7826         free_trace_buffers(tr);
7827         free_cpumask_var(tr->tracing_cpumask);
7828         kfree(tr->name);
7829         kfree(tr);
7830
7831  out_unlock:
7832         mutex_unlock(&trace_types_lock);
7833         mutex_unlock(&event_mutex);
7834
7835         return ret;
7836
7837 }
7838
7839 static int instance_rmdir(const char *name)
7840 {
7841         struct trace_array *tr;
7842         int found = 0;
7843         int ret;
7844         int i;
7845
7846         mutex_lock(&event_mutex);
7847         mutex_lock(&trace_types_lock);
7848
7849         ret = -ENODEV;
7850         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7851                 if (tr->name && strcmp(tr->name, name) == 0) {
7852                         found = 1;
7853                         break;
7854                 }
7855         }
7856         if (!found)
7857                 goto out_unlock;
7858
7859         ret = -EBUSY;
7860         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7861                 goto out_unlock;
7862
7863         list_del(&tr->list);
7864
7865         /* Disable all the flags that were enabled coming in */
7866         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7867                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7868                         set_tracer_flag(tr, 1 << i, 0);
7869         }
7870
7871         tracing_set_nop(tr);
7872         clear_ftrace_function_probes(tr);
7873         event_trace_del_tracer(tr);
7874         ftrace_clear_pids(tr);
7875         ftrace_destroy_function_files(tr);
7876         tracefs_remove_recursive(tr->dir);
7877         free_trace_buffers(tr);
7878
7879         for (i = 0; i < tr->nr_topts; i++) {
7880                 kfree(tr->topts[i].topts);
7881         }
7882         kfree(tr->topts);
7883
7884         free_cpumask_var(tr->tracing_cpumask);
7885         kfree(tr->name);
7886         kfree(tr);
7887
7888         ret = 0;
7889
7890  out_unlock:
7891         mutex_unlock(&trace_types_lock);
7892         mutex_unlock(&event_mutex);
7893
7894         return ret;
7895 }
7896
7897 static __init void create_trace_instances(struct dentry *d_tracer)
7898 {
7899         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7900                                                          instance_mkdir,
7901                                                          instance_rmdir);
7902         if (WARN_ON(!trace_instance_dir))
7903                 return;
7904 }
7905
7906 static void
7907 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7908 {
7909         int cpu;
7910
7911         trace_create_file("available_tracers", 0444, d_tracer,
7912                         tr, &show_traces_fops);
7913
7914         trace_create_file("current_tracer", 0644, d_tracer,
7915                         tr, &set_tracer_fops);
7916
7917         trace_create_file("tracing_cpumask", 0644, d_tracer,
7918                           tr, &tracing_cpumask_fops);
7919
7920         trace_create_file("trace_options", 0644, d_tracer,
7921                           tr, &tracing_iter_fops);
7922
7923         trace_create_file("trace", 0644, d_tracer,
7924                           tr, &tracing_fops);
7925
7926         trace_create_file("trace_pipe", 0444, d_tracer,
7927                           tr, &tracing_pipe_fops);
7928
7929         trace_create_file("buffer_size_kb", 0644, d_tracer,
7930                           tr, &tracing_entries_fops);
7931
7932         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7933                           tr, &tracing_total_entries_fops);
7934
7935         trace_create_file("free_buffer", 0200, d_tracer,
7936                           tr, &tracing_free_buffer_fops);
7937
7938         trace_create_file("trace_marker", 0220, d_tracer,
7939                           tr, &tracing_mark_fops);
7940
7941         trace_create_file("trace_marker_raw", 0220, d_tracer,
7942                           tr, &tracing_mark_raw_fops);
7943
7944         trace_create_file("trace_clock", 0644, d_tracer, tr,
7945                           &trace_clock_fops);
7946
7947         trace_create_file("tracing_on", 0644, d_tracer,
7948                           tr, &rb_simple_fops);
7949
7950         create_trace_options_dir(tr);
7951
7952 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7953         trace_create_file("tracing_max_latency", 0644, d_tracer,
7954                         &tr->max_latency, &tracing_max_lat_fops);
7955 #endif
7956
7957         if (ftrace_create_function_files(tr, d_tracer))
7958                 WARN(1, "Could not allocate function filter files");
7959
7960 #ifdef CONFIG_TRACER_SNAPSHOT
7961         trace_create_file("snapshot", 0644, d_tracer,
7962                           tr, &snapshot_fops);
7963 #endif
7964
7965         for_each_tracing_cpu(cpu)
7966                 tracing_init_tracefs_percpu(tr, cpu);
7967
7968         ftrace_init_tracefs(tr, d_tracer);
7969 }
7970
7971 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7972 {
7973         struct vfsmount *mnt;
7974         struct file_system_type *type;
7975
7976         /*
7977          * To maintain backward compatibility for tools that mount
7978          * debugfs to get to the tracing facility, tracefs is automatically
7979          * mounted to the debugfs/tracing directory.
7980          */
7981         type = get_fs_type("tracefs");
7982         if (!type)
7983                 return NULL;
7984         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7985         put_filesystem(type);
7986         if (IS_ERR(mnt))
7987                 return NULL;
7988         mntget(mnt);
7989
7990         return mnt;
7991 }
7992
7993 /**
7994  * tracing_init_dentry - initialize top level trace array
7995  *
7996  * This is called when creating files or directories in the tracing
7997  * directory. It is called via fs_initcall() by any of the boot up code
7998  * and expects to return the dentry of the top level tracing directory.
7999  */
8000 struct dentry *tracing_init_dentry(void)
8001 {
8002         struct trace_array *tr = &global_trace;
8003
8004         /* The top level trace array uses  NULL as parent */
8005         if (tr->dir)
8006                 return NULL;
8007
8008         if (WARN_ON(!tracefs_initialized()) ||
8009                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8010                  WARN_ON(!debugfs_initialized())))
8011                 return ERR_PTR(-ENODEV);
8012
8013         /*
8014          * As there may still be users that expect the tracing
8015          * files to exist in debugfs/tracing, we must automount
8016          * the tracefs file system there, so older tools still
8017          * work with the newer kerenl.
8018          */
8019         tr->dir = debugfs_create_automount("tracing", NULL,
8020                                            trace_automount, NULL);
8021         if (!tr->dir) {
8022                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8023                 return ERR_PTR(-ENOMEM);
8024         }
8025
8026         return NULL;
8027 }
8028
8029 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8030 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8031
8032 static void __init trace_eval_init(void)
8033 {
8034         int len;
8035
8036         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8037         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8038 }
8039
8040 #ifdef CONFIG_MODULES
8041 static void trace_module_add_evals(struct module *mod)
8042 {
8043         if (!mod->num_trace_evals)
8044                 return;
8045
8046         /*
8047          * Modules with bad taint do not have events created, do
8048          * not bother with enums either.
8049          */
8050         if (trace_module_has_bad_taint(mod))
8051                 return;
8052
8053         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8054 }
8055
8056 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8057 static void trace_module_remove_evals(struct module *mod)
8058 {
8059         union trace_eval_map_item *map;
8060         union trace_eval_map_item **last = &trace_eval_maps;
8061
8062         if (!mod->num_trace_evals)
8063                 return;
8064
8065         mutex_lock(&trace_eval_mutex);
8066
8067         map = trace_eval_maps;
8068
8069         while (map) {
8070                 if (map->head.mod == mod)
8071                         break;
8072                 map = trace_eval_jmp_to_tail(map);
8073                 last = &map->tail.next;
8074                 map = map->tail.next;
8075         }
8076         if (!map)
8077                 goto out;
8078
8079         *last = trace_eval_jmp_to_tail(map)->tail.next;
8080         kfree(map);
8081  out:
8082         mutex_unlock(&trace_eval_mutex);
8083 }
8084 #else
8085 static inline void trace_module_remove_evals(struct module *mod) { }
8086 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8087
8088 static int trace_module_notify(struct notifier_block *self,
8089                                unsigned long val, void *data)
8090 {
8091         struct module *mod = data;
8092
8093         switch (val) {
8094         case MODULE_STATE_COMING:
8095                 trace_module_add_evals(mod);
8096                 break;
8097         case MODULE_STATE_GOING:
8098                 trace_module_remove_evals(mod);
8099                 break;
8100         }
8101
8102         return 0;
8103 }
8104
8105 static struct notifier_block trace_module_nb = {
8106         .notifier_call = trace_module_notify,
8107         .priority = 0,
8108 };
8109 #endif /* CONFIG_MODULES */
8110
8111 static __init int tracer_init_tracefs(void)
8112 {
8113         struct dentry *d_tracer;
8114
8115         trace_access_lock_init();
8116
8117         d_tracer = tracing_init_dentry();
8118         if (IS_ERR(d_tracer))
8119                 return 0;
8120
8121         init_tracer_tracefs(&global_trace, d_tracer);
8122         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8123
8124         trace_create_file("tracing_thresh", 0644, d_tracer,
8125                         &global_trace, &tracing_thresh_fops);
8126
8127         trace_create_file("README", 0444, d_tracer,
8128                         NULL, &tracing_readme_fops);
8129
8130         trace_create_file("saved_cmdlines", 0444, d_tracer,
8131                         NULL, &tracing_saved_cmdlines_fops);
8132
8133         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8134                           NULL, &tracing_saved_cmdlines_size_fops);
8135
8136         trace_create_file("saved_tgids", 0444, d_tracer,
8137                         NULL, &tracing_saved_tgids_fops);
8138
8139         trace_eval_init();
8140
8141         trace_create_eval_file(d_tracer);
8142
8143 #ifdef CONFIG_MODULES
8144         register_module_notifier(&trace_module_nb);
8145 #endif
8146
8147 #ifdef CONFIG_DYNAMIC_FTRACE
8148         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8149                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8150 #endif
8151
8152         create_trace_instances(d_tracer);
8153
8154         update_tracer_options(&global_trace);
8155
8156         return 0;
8157 }
8158
8159 static int trace_panic_handler(struct notifier_block *this,
8160                                unsigned long event, void *unused)
8161 {
8162         if (ftrace_dump_on_oops)
8163                 ftrace_dump(ftrace_dump_on_oops);
8164         return NOTIFY_OK;
8165 }
8166
8167 static struct notifier_block trace_panic_notifier = {
8168         .notifier_call  = trace_panic_handler,
8169         .next           = NULL,
8170         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8171 };
8172
8173 static int trace_die_handler(struct notifier_block *self,
8174                              unsigned long val,
8175                              void *data)
8176 {
8177         switch (val) {
8178         case DIE_OOPS:
8179                 if (ftrace_dump_on_oops)
8180                         ftrace_dump(ftrace_dump_on_oops);
8181                 break;
8182         default:
8183                 break;
8184         }
8185         return NOTIFY_OK;
8186 }
8187
8188 static struct notifier_block trace_die_notifier = {
8189         .notifier_call = trace_die_handler,
8190         .priority = 200
8191 };
8192
8193 /*
8194  * printk is set to max of 1024, we really don't need it that big.
8195  * Nothing should be printing 1000 characters anyway.
8196  */
8197 #define TRACE_MAX_PRINT         1000
8198
8199 /*
8200  * Define here KERN_TRACE so that we have one place to modify
8201  * it if we decide to change what log level the ftrace dump
8202  * should be at.
8203  */
8204 #define KERN_TRACE              KERN_EMERG
8205
8206 void
8207 trace_printk_seq(struct trace_seq *s)
8208 {
8209         /* Probably should print a warning here. */
8210         if (s->seq.len >= TRACE_MAX_PRINT)
8211                 s->seq.len = TRACE_MAX_PRINT;
8212
8213         /*
8214          * More paranoid code. Although the buffer size is set to
8215          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8216          * an extra layer of protection.
8217          */
8218         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8219                 s->seq.len = s->seq.size - 1;
8220
8221         /* should be zero ended, but we are paranoid. */
8222         s->buffer[s->seq.len] = 0;
8223
8224         printk(KERN_TRACE "%s", s->buffer);
8225
8226         trace_seq_init(s);
8227 }
8228
8229 void trace_init_global_iter(struct trace_iterator *iter)
8230 {
8231         iter->tr = &global_trace;
8232         iter->trace = iter->tr->current_trace;
8233         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8234         iter->trace_buffer = &global_trace.trace_buffer;
8235
8236         if (iter->trace && iter->trace->open)
8237                 iter->trace->open(iter);
8238
8239         /* Annotate start of buffers if we had overruns */
8240         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8241                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8242
8243         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8244         if (trace_clocks[iter->tr->clock_id].in_ns)
8245                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8246 }
8247
8248 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8249 {
8250         /* use static because iter can be a bit big for the stack */
8251         static struct trace_iterator iter;
8252         static atomic_t dump_running;
8253         struct trace_array *tr = &global_trace;
8254         unsigned int old_userobj;
8255         unsigned long flags;
8256         int cnt = 0, cpu;
8257
8258         /* Only allow one dump user at a time. */
8259         if (atomic_inc_return(&dump_running) != 1) {
8260                 atomic_dec(&dump_running);
8261                 return;
8262         }
8263
8264         /*
8265          * Always turn off tracing when we dump.
8266          * We don't need to show trace output of what happens
8267          * between multiple crashes.
8268          *
8269          * If the user does a sysrq-z, then they can re-enable
8270          * tracing with echo 1 > tracing_on.
8271          */
8272         tracing_off();
8273
8274         local_irq_save(flags);
8275         printk_nmi_direct_enter();
8276
8277         /* Simulate the iterator */
8278         trace_init_global_iter(&iter);
8279
8280         for_each_tracing_cpu(cpu) {
8281                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8282         }
8283
8284         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8285
8286         /* don't look at user memory in panic mode */
8287         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8288
8289         switch (oops_dump_mode) {
8290         case DUMP_ALL:
8291                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8292                 break;
8293         case DUMP_ORIG:
8294                 iter.cpu_file = raw_smp_processor_id();
8295                 break;
8296         case DUMP_NONE:
8297                 goto out_enable;
8298         default:
8299                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8300                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8301         }
8302
8303         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8304
8305         /* Did function tracer already get disabled? */
8306         if (ftrace_is_dead()) {
8307                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8308                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8309         }
8310
8311         /*
8312          * We need to stop all tracing on all CPUS to read the
8313          * the next buffer. This is a bit expensive, but is
8314          * not done often. We fill all what we can read,
8315          * and then release the locks again.
8316          */
8317
8318         while (!trace_empty(&iter)) {
8319
8320                 if (!cnt)
8321                         printk(KERN_TRACE "---------------------------------\n");
8322
8323                 cnt++;
8324
8325                 trace_iterator_reset(&iter);
8326                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8327
8328                 if (trace_find_next_entry_inc(&iter) != NULL) {
8329                         int ret;
8330
8331                         ret = print_trace_line(&iter);
8332                         if (ret != TRACE_TYPE_NO_CONSUME)
8333                                 trace_consume(&iter);
8334                 }
8335                 touch_nmi_watchdog();
8336
8337                 trace_printk_seq(&iter.seq);
8338         }
8339
8340         if (!cnt)
8341                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8342         else
8343                 printk(KERN_TRACE "---------------------------------\n");
8344
8345  out_enable:
8346         tr->trace_flags |= old_userobj;
8347
8348         for_each_tracing_cpu(cpu) {
8349                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8350         }
8351         atomic_dec(&dump_running);
8352         printk_nmi_direct_exit();
8353         local_irq_restore(flags);
8354 }
8355 EXPORT_SYMBOL_GPL(ftrace_dump);
8356
8357 __init static int tracer_alloc_buffers(void)
8358 {
8359         int ring_buf_size;
8360         int ret = -ENOMEM;
8361
8362         /*
8363          * Make sure we don't accidently add more trace options
8364          * than we have bits for.
8365          */
8366         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8367
8368         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8369                 goto out;
8370
8371         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8372                 goto out_free_buffer_mask;
8373
8374         /* Only allocate trace_printk buffers if a trace_printk exists */
8375         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8376                 /* Must be called before global_trace.buffer is allocated */
8377                 trace_printk_init_buffers();
8378
8379         /* To save memory, keep the ring buffer size to its minimum */
8380         if (ring_buffer_expanded)
8381                 ring_buf_size = trace_buf_size;
8382         else
8383                 ring_buf_size = 1;
8384
8385         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8386         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8387
8388         raw_spin_lock_init(&global_trace.start_lock);
8389
8390         /*
8391          * The prepare callbacks allocates some memory for the ring buffer. We
8392          * don't free the buffer if the if the CPU goes down. If we were to free
8393          * the buffer, then the user would lose any trace that was in the
8394          * buffer. The memory will be removed once the "instance" is removed.
8395          */
8396         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8397                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8398                                       NULL);
8399         if (ret < 0)
8400                 goto out_free_cpumask;
8401         /* Used for event triggers */
8402         ret = -ENOMEM;
8403         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8404         if (!temp_buffer)
8405                 goto out_rm_hp_state;
8406
8407         if (trace_create_savedcmd() < 0)
8408                 goto out_free_temp_buffer;
8409
8410         /* TODO: make the number of buffers hot pluggable with CPUS */
8411         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8412                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8413                 WARN_ON(1);
8414                 goto out_free_savedcmd;
8415         }
8416
8417         if (global_trace.buffer_disabled)
8418                 tracing_off();
8419
8420         if (trace_boot_clock) {
8421                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8422                 if (ret < 0)
8423                         pr_warn("Trace clock %s not defined, going back to default\n",
8424                                 trace_boot_clock);
8425         }
8426
8427         /*
8428          * register_tracer() might reference current_trace, so it
8429          * needs to be set before we register anything. This is
8430          * just a bootstrap of current_trace anyway.
8431          */
8432         global_trace.current_trace = &nop_trace;
8433
8434         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8435
8436         ftrace_init_global_array_ops(&global_trace);
8437
8438         init_trace_flags_index(&global_trace);
8439
8440         register_tracer(&nop_trace);
8441
8442         /* Function tracing may start here (via kernel command line) */
8443         init_function_trace();
8444
8445         /* All seems OK, enable tracing */
8446         tracing_disabled = 0;
8447
8448         atomic_notifier_chain_register(&panic_notifier_list,
8449                                        &trace_panic_notifier);
8450
8451         register_die_notifier(&trace_die_notifier);
8452
8453         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8454
8455         INIT_LIST_HEAD(&global_trace.systems);
8456         INIT_LIST_HEAD(&global_trace.events);
8457         list_add(&global_trace.list, &ftrace_trace_arrays);
8458
8459         apply_trace_boot_options();
8460
8461         register_snapshot_cmd();
8462
8463         return 0;
8464
8465 out_free_savedcmd:
8466         free_saved_cmdlines_buffer(savedcmd);
8467 out_free_temp_buffer:
8468         ring_buffer_free(temp_buffer);
8469 out_rm_hp_state:
8470         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8471 out_free_cpumask:
8472         free_cpumask_var(global_trace.tracing_cpumask);
8473 out_free_buffer_mask:
8474         free_cpumask_var(tracing_buffer_mask);
8475 out:
8476         return ret;
8477 }
8478
8479 void __init early_trace_init(void)
8480 {
8481         if (tracepoint_printk) {
8482                 tracepoint_print_iter =
8483                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8484                 if (WARN_ON(!tracepoint_print_iter))
8485                         tracepoint_printk = 0;
8486                 else
8487                         static_key_enable(&tracepoint_printk_key.key);
8488         }
8489         tracer_alloc_buffers();
8490
8491         init_events();
8492 }
8493
8494 void __init trace_init(void)
8495 {
8496         trace_event_init();
8497 }
8498
8499 __init static int clear_boot_tracer(void)
8500 {
8501         /*
8502          * The default tracer at boot buffer is an init section.
8503          * This function is called in lateinit. If we did not
8504          * find the boot tracer, then clear it out, to prevent
8505          * later registration from accessing the buffer that is
8506          * about to be freed.
8507          */
8508         if (!default_bootup_tracer)
8509                 return 0;
8510
8511         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8512                default_bootup_tracer);
8513         default_bootup_tracer = NULL;
8514
8515         return 0;
8516 }
8517
8518 fs_initcall(tracer_init_tracefs);
8519 late_initcall_sync(clear_boot_tracer);