GNU Linux-libre 4.14.295-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         /* Ignore the "tp_printk_stop_on_boot" param */
234         if (*str == '_')
235                 return 0;
236
237         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238                 tracepoint_printk = 1;
239         return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242
243 unsigned long long ns2usecs(u64 nsec)
244 {
245         nsec += 500;
246         do_div(nsec, 1000);
247         return nsec;
248 }
249
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS                                             \
252         (FUNCTION_DEFAULT_FLAGS |                                       \
253          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
254          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
255          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
256          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
260                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271         .trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273
274 LIST_HEAD(ftrace_trace_arrays);
275
276 int trace_array_get(struct trace_array *this_tr)
277 {
278         struct trace_array *tr;
279         int ret = -ENODEV;
280
281         mutex_lock(&trace_types_lock);
282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283                 if (tr == this_tr) {
284                         tr->ref++;
285                         ret = 0;
286                         break;
287                 }
288         }
289         mutex_unlock(&trace_types_lock);
290
291         return ret;
292 }
293
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296         WARN_ON(!this_tr->ref);
297         this_tr->ref--;
298 }
299
300 void trace_array_put(struct trace_array *this_tr)
301 {
302         mutex_lock(&trace_types_lock);
303         __trace_array_put(this_tr);
304         mutex_unlock(&trace_types_lock);
305 }
306
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308                               struct ring_buffer *buffer,
309                               struct ring_buffer_event *event)
310 {
311         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312             !filter_match_preds(call->filter, rec)) {
313                 __trace_event_discard_commit(buffer, event);
314                 return 1;
315         }
316
317         return 0;
318 }
319
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322         vfree(pid_list->pids);
323         kfree(pid_list);
324 }
325
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336         /*
337          * If pid_max changed after filtered_pids was created, we
338          * by default ignore all pids greater than the previous pid_max.
339          */
340         if (search_pid >= filtered_pids->pid_max)
341                 return false;
342
343         return test_bit(search_pid, filtered_pids->pids);
344 }
345
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358         /*
359          * Return false, because if filtered_pids does not exist,
360          * all pids are good to trace.
361          */
362         if (!filtered_pids)
363                 return false;
364
365         return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367
368 /**
369  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381                                   struct task_struct *self,
382                                   struct task_struct *task)
383 {
384         if (!pid_list)
385                 return;
386
387         /* For forks, we only add if the forking task is listed */
388         if (self) {
389                 if (!trace_find_filtered_pid(pid_list, self->pid))
390                         return;
391         }
392
393         /* Sorry, but we don't support pid_max changing after setting */
394         if (task->pid >= pid_list->pid_max)
395                 return;
396
397         /* "self" is set for forks, and NULL for exits */
398         if (self)
399                 set_bit(task->pid, pid_list->pids);
400         else
401                 clear_bit(task->pid, pid_list->pids);
402 }
403
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418         unsigned long pid = (unsigned long)v;
419
420         (*pos)++;
421
422         /* pid already is +1 of the actual prevous bit */
423         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424
425         /* Return pid + 1 to allow zero to be represented */
426         if (pid < pid_list->pid_max)
427                 return (void *)(pid + 1);
428
429         return NULL;
430 }
431
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445         unsigned long pid;
446         loff_t l = 0;
447
448         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449         if (pid >= pid_list->pid_max)
450                 return NULL;
451
452         /* Return pid + 1 so that zero can be the exit value */
453         for (pid++; pid && l < *pos;
454              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455                 ;
456         return (void *)pid;
457 }
458
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469         unsigned long pid = (unsigned long)v - 1;
470
471         seq_printf(m, "%lu\n", pid);
472         return 0;
473 }
474
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE            127
477
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479                     struct trace_pid_list **new_pid_list,
480                     const char __user *ubuf, size_t cnt)
481 {
482         struct trace_pid_list *pid_list;
483         struct trace_parser parser;
484         unsigned long val;
485         int nr_pids = 0;
486         ssize_t read = 0;
487         ssize_t ret = 0;
488         loff_t pos;
489         pid_t pid;
490
491         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492                 return -ENOMEM;
493
494         /*
495          * Always recreate a new array. The write is an all or nothing
496          * operation. Always create a new array when adding new pids by
497          * the user. If the operation fails, then the current list is
498          * not modified.
499          */
500         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501         if (!pid_list) {
502                 trace_parser_put(&parser);
503                 return -ENOMEM;
504         }
505
506         pid_list->pid_max = READ_ONCE(pid_max);
507
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 trace_parser_put(&parser);
515                 kfree(pid_list);
516                 return -ENOMEM;
517         }
518
519         if (filtered_pids) {
520                 /* copy the current bits to the new max */
521                 for_each_set_bit(pid, filtered_pids->pids,
522                                  filtered_pids->pid_max) {
523                         set_bit(pid, pid_list->pids);
524                         nr_pids++;
525                 }
526         }
527
528         while (cnt > 0) {
529
530                 pos = 0;
531
532                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
533                 if (ret < 0 || !trace_parser_loaded(&parser))
534                         break;
535
536                 read += ret;
537                 ubuf += ret;
538                 cnt -= ret;
539
540                 parser.buffer[parser.idx] = 0;
541
542                 ret = -EINVAL;
543                 if (kstrtoul(parser.buffer, 0, &val))
544                         break;
545                 if (val >= pid_list->pid_max)
546                         break;
547
548                 pid = (pid_t)val;
549
550                 set_bit(pid, pid_list->pids);
551                 nr_pids++;
552
553                 trace_parser_clear(&parser);
554                 ret = 0;
555         }
556         trace_parser_put(&parser);
557
558         if (ret < 0) {
559                 trace_free_pid_list(pid_list);
560                 return ret;
561         }
562
563         if (!nr_pids) {
564                 /* Cleared the list of pids */
565                 trace_free_pid_list(pid_list);
566                 read = ret;
567                 pid_list = NULL;
568         }
569
570         *new_pid_list = pid_list;
571
572         return read;
573 }
574
575 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
576 {
577         u64 ts;
578
579         /* Early boot up does not have a buffer yet */
580         if (!buf->buffer)
581                 return trace_clock_local();
582
583         ts = ring_buffer_time_stamp(buf->buffer, cpu);
584         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
585
586         return ts;
587 }
588
589 u64 ftrace_now(int cpu)
590 {
591         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
592 }
593
594 /**
595  * tracing_is_enabled - Show if global_trace has been disabled
596  *
597  * Shows if the global trace has been enabled or not. It uses the
598  * mirror flag "buffer_disabled" to be used in fast paths such as for
599  * the irqsoff tracer. But it may be inaccurate due to races. If you
600  * need to know the accurate state, use tracing_is_on() which is a little
601  * slower, but accurate.
602  */
603 int tracing_is_enabled(void)
604 {
605         /*
606          * For quick access (irqsoff uses this in fast path), just
607          * return the mirror variable of the state of the ring buffer.
608          * It's a little racy, but we don't really care.
609          */
610         smp_rmb();
611         return !global_trace.buffer_disabled;
612 }
613
614 /*
615  * trace_buf_size is the size in bytes that is allocated
616  * for a buffer. Note, the number of bytes is always rounded
617  * to page size.
618  *
619  * This number is purposely set to a low number of 16384.
620  * If the dump on oops happens, it will be much appreciated
621  * to not have to wait for all that output. Anyway this can be
622  * boot time and run time configurable.
623  */
624 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
625
626 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
627
628 /* trace_types holds a link list of available tracers. */
629 static struct tracer            *trace_types __read_mostly;
630
631 /*
632  * trace_types_lock is used to protect the trace_types list.
633  */
634 DEFINE_MUTEX(trace_types_lock);
635
636 /*
637  * serialize the access of the ring buffer
638  *
639  * ring buffer serializes readers, but it is low level protection.
640  * The validity of the events (which returns by ring_buffer_peek() ..etc)
641  * are not protected by ring buffer.
642  *
643  * The content of events may become garbage if we allow other process consumes
644  * these events concurrently:
645  *   A) the page of the consumed events may become a normal page
646  *      (not reader page) in ring buffer, and this page will be rewrited
647  *      by events producer.
648  *   B) The page of the consumed events may become a page for splice_read,
649  *      and this page will be returned to system.
650  *
651  * These primitives allow multi process access to different cpu ring buffer
652  * concurrently.
653  *
654  * These primitives don't distinguish read-only and read-consume access.
655  * Multi read-only access are also serialized.
656  */
657
658 #ifdef CONFIG_SMP
659 static DECLARE_RWSEM(all_cpu_access_lock);
660 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
661
662 static inline void trace_access_lock(int cpu)
663 {
664         if (cpu == RING_BUFFER_ALL_CPUS) {
665                 /* gain it for accessing the whole ring buffer. */
666                 down_write(&all_cpu_access_lock);
667         } else {
668                 /* gain it for accessing a cpu ring buffer. */
669
670                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
671                 down_read(&all_cpu_access_lock);
672
673                 /* Secondly block other access to this @cpu ring buffer. */
674                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
675         }
676 }
677
678 static inline void trace_access_unlock(int cpu)
679 {
680         if (cpu == RING_BUFFER_ALL_CPUS) {
681                 up_write(&all_cpu_access_lock);
682         } else {
683                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
684                 up_read(&all_cpu_access_lock);
685         }
686 }
687
688 static inline void trace_access_lock_init(void)
689 {
690         int cpu;
691
692         for_each_possible_cpu(cpu)
693                 mutex_init(&per_cpu(cpu_access_lock, cpu));
694 }
695
696 #else
697
698 static DEFINE_MUTEX(access_lock);
699
700 static inline void trace_access_lock(int cpu)
701 {
702         (void)cpu;
703         mutex_lock(&access_lock);
704 }
705
706 static inline void trace_access_unlock(int cpu)
707 {
708         (void)cpu;
709         mutex_unlock(&access_lock);
710 }
711
712 static inline void trace_access_lock_init(void)
713 {
714 }
715
716 #endif
717
718 #ifdef CONFIG_STACKTRACE
719 static void __ftrace_trace_stack(struct ring_buffer *buffer,
720                                  unsigned long flags,
721                                  int skip, int pc, struct pt_regs *regs);
722 static inline void ftrace_trace_stack(struct trace_array *tr,
723                                       struct ring_buffer *buffer,
724                                       unsigned long flags,
725                                       int skip, int pc, struct pt_regs *regs);
726
727 #else
728 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
729                                         unsigned long flags,
730                                         int skip, int pc, struct pt_regs *regs)
731 {
732 }
733 static inline void ftrace_trace_stack(struct trace_array *tr,
734                                       struct ring_buffer *buffer,
735                                       unsigned long flags,
736                                       int skip, int pc, struct pt_regs *regs)
737 {
738 }
739
740 #endif
741
742 static __always_inline void
743 trace_event_setup(struct ring_buffer_event *event,
744                   int type, unsigned long flags, int pc)
745 {
746         struct trace_entry *ent = ring_buffer_event_data(event);
747
748         tracing_generic_entry_update(ent, flags, pc);
749         ent->type = type;
750 }
751
752 static __always_inline struct ring_buffer_event *
753 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
754                           int type,
755                           unsigned long len,
756                           unsigned long flags, int pc)
757 {
758         struct ring_buffer_event *event;
759
760         event = ring_buffer_lock_reserve(buffer, len);
761         if (event != NULL)
762                 trace_event_setup(event, type, flags, pc);
763
764         return event;
765 }
766
767 void tracer_tracing_on(struct trace_array *tr)
768 {
769         if (tr->trace_buffer.buffer)
770                 ring_buffer_record_on(tr->trace_buffer.buffer);
771         /*
772          * This flag is looked at when buffers haven't been allocated
773          * yet, or by some tracers (like irqsoff), that just want to
774          * know if the ring buffer has been disabled, but it can handle
775          * races of where it gets disabled but we still do a record.
776          * As the check is in the fast path of the tracers, it is more
777          * important to be fast than accurate.
778          */
779         tr->buffer_disabled = 0;
780         /* Make the flag seen by readers */
781         smp_wmb();
782 }
783
784 /**
785  * tracing_on - enable tracing buffers
786  *
787  * This function enables tracing buffers that may have been
788  * disabled with tracing_off.
789  */
790 void tracing_on(void)
791 {
792         tracer_tracing_on(&global_trace);
793 }
794 EXPORT_SYMBOL_GPL(tracing_on);
795
796
797 static __always_inline void
798 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
799 {
800         __this_cpu_write(trace_taskinfo_save, true);
801
802         /* If this is the temp buffer, we need to commit fully */
803         if (this_cpu_read(trace_buffered_event) == event) {
804                 /* Length is in event->array[0] */
805                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
806                 /* Release the temp buffer */
807                 this_cpu_dec(trace_buffered_event_cnt);
808         } else
809                 ring_buffer_unlock_commit(buffer, event);
810 }
811
812 /**
813  * __trace_puts - write a constant string into the trace buffer.
814  * @ip:    The address of the caller
815  * @str:   The constant string to write
816  * @size:  The size of the string.
817  */
818 int __trace_puts(unsigned long ip, const char *str, int size)
819 {
820         struct ring_buffer_event *event;
821         struct ring_buffer *buffer;
822         struct print_entry *entry;
823         unsigned long irq_flags;
824         int alloc;
825         int pc;
826
827         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
828                 return 0;
829
830         pc = preempt_count();
831
832         if (unlikely(tracing_selftest_running || tracing_disabled))
833                 return 0;
834
835         alloc = sizeof(*entry) + size + 2; /* possible \n added */
836
837         local_save_flags(irq_flags);
838         buffer = global_trace.trace_buffer.buffer;
839         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
840                                             irq_flags, pc);
841         if (!event)
842                 return 0;
843
844         entry = ring_buffer_event_data(event);
845         entry->ip = ip;
846
847         memcpy(&entry->buf, str, size);
848
849         /* Add a newline if necessary */
850         if (entry->buf[size - 1] != '\n') {
851                 entry->buf[size] = '\n';
852                 entry->buf[size + 1] = '\0';
853         } else
854                 entry->buf[size] = '\0';
855
856         __buffer_unlock_commit(buffer, event);
857         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
858
859         return size;
860 }
861 EXPORT_SYMBOL_GPL(__trace_puts);
862
863 /**
864  * __trace_bputs - write the pointer to a constant string into trace buffer
865  * @ip:    The address of the caller
866  * @str:   The constant string to write to the buffer to
867  */
868 int __trace_bputs(unsigned long ip, const char *str)
869 {
870         struct ring_buffer_event *event;
871         struct ring_buffer *buffer;
872         struct bputs_entry *entry;
873         unsigned long irq_flags;
874         int size = sizeof(struct bputs_entry);
875         int pc;
876
877         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
878                 return 0;
879
880         pc = preempt_count();
881
882         if (unlikely(tracing_selftest_running || tracing_disabled))
883                 return 0;
884
885         local_save_flags(irq_flags);
886         buffer = global_trace.trace_buffer.buffer;
887         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
888                                             irq_flags, pc);
889         if (!event)
890                 return 0;
891
892         entry = ring_buffer_event_data(event);
893         entry->ip                       = ip;
894         entry->str                      = str;
895
896         __buffer_unlock_commit(buffer, event);
897         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898
899         return 1;
900 }
901 EXPORT_SYMBOL_GPL(__trace_bputs);
902
903 #ifdef CONFIG_TRACER_SNAPSHOT
904 void tracing_snapshot_instance(struct trace_array *tr)
905 {
906         struct tracer *tracer = tr->current_trace;
907         unsigned long flags;
908
909         if (in_nmi()) {
910                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
911                 internal_trace_puts("*** snapshot is being ignored        ***\n");
912                 return;
913         }
914
915         if (!tr->allocated_snapshot) {
916                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
917                 internal_trace_puts("*** stopping trace here!   ***\n");
918                 tracing_off();
919                 return;
920         }
921
922         /* Note, snapshot can not be used when the tracer uses it */
923         if (tracer->use_max_tr) {
924                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
925                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
926                 return;
927         }
928
929         local_irq_save(flags);
930         update_max_tr(tr, current, smp_processor_id());
931         local_irq_restore(flags);
932 }
933
934 /**
935  * trace_snapshot - take a snapshot of the current buffer.
936  *
937  * This causes a swap between the snapshot buffer and the current live
938  * tracing buffer. You can use this to take snapshots of the live
939  * trace when some condition is triggered, but continue to trace.
940  *
941  * Note, make sure to allocate the snapshot with either
942  * a tracing_snapshot_alloc(), or by doing it manually
943  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
944  *
945  * If the snapshot buffer is not allocated, it will stop tracing.
946  * Basically making a permanent snapshot.
947  */
948 void tracing_snapshot(void)
949 {
950         struct trace_array *tr = &global_trace;
951
952         tracing_snapshot_instance(tr);
953 }
954 EXPORT_SYMBOL_GPL(tracing_snapshot);
955
956 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
957                                         struct trace_buffer *size_buf, int cpu_id);
958 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
959
960 int tracing_alloc_snapshot_instance(struct trace_array *tr)
961 {
962         int ret;
963
964         if (!tr->allocated_snapshot) {
965
966                 /* allocate spare buffer */
967                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
968                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
969                 if (ret < 0)
970                         return ret;
971
972                 tr->allocated_snapshot = true;
973         }
974
975         return 0;
976 }
977
978 static void free_snapshot(struct trace_array *tr)
979 {
980         /*
981          * We don't free the ring buffer. instead, resize it because
982          * The max_tr ring buffer has some state (e.g. ring->clock) and
983          * we want preserve it.
984          */
985         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
986         set_buffer_entries(&tr->max_buffer, 1);
987         tracing_reset_online_cpus(&tr->max_buffer);
988         tr->allocated_snapshot = false;
989 }
990
991 /**
992  * tracing_alloc_snapshot - allocate snapshot buffer.
993  *
994  * This only allocates the snapshot buffer if it isn't already
995  * allocated - it doesn't also take a snapshot.
996  *
997  * This is meant to be used in cases where the snapshot buffer needs
998  * to be set up for events that can't sleep but need to be able to
999  * trigger a snapshot.
1000  */
1001 int tracing_alloc_snapshot(void)
1002 {
1003         struct trace_array *tr = &global_trace;
1004         int ret;
1005
1006         ret = tracing_alloc_snapshot_instance(tr);
1007         WARN_ON(ret < 0);
1008
1009         return ret;
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1012
1013 /**
1014  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1015  *
1016  * This is similar to trace_snapshot(), but it will allocate the
1017  * snapshot buffer if it isn't already allocated. Use this only
1018  * where it is safe to sleep, as the allocation may sleep.
1019  *
1020  * This causes a swap between the snapshot buffer and the current live
1021  * tracing buffer. You can use this to take snapshots of the live
1022  * trace when some condition is triggered, but continue to trace.
1023  */
1024 void tracing_snapshot_alloc(void)
1025 {
1026         int ret;
1027
1028         ret = tracing_alloc_snapshot();
1029         if (ret < 0)
1030                 return;
1031
1032         tracing_snapshot();
1033 }
1034 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1035 #else
1036 void tracing_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_snapshot);
1041 int tracing_alloc_snapshot(void)
1042 {
1043         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1044         return -ENODEV;
1045 }
1046 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1047 void tracing_snapshot_alloc(void)
1048 {
1049         /* Give warning */
1050         tracing_snapshot();
1051 }
1052 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1053 #endif /* CONFIG_TRACER_SNAPSHOT */
1054
1055 void tracer_tracing_off(struct trace_array *tr)
1056 {
1057         if (tr->trace_buffer.buffer)
1058                 ring_buffer_record_off(tr->trace_buffer.buffer);
1059         /*
1060          * This flag is looked at when buffers haven't been allocated
1061          * yet, or by some tracers (like irqsoff), that just want to
1062          * know if the ring buffer has been disabled, but it can handle
1063          * races of where it gets disabled but we still do a record.
1064          * As the check is in the fast path of the tracers, it is more
1065          * important to be fast than accurate.
1066          */
1067         tr->buffer_disabled = 1;
1068         /* Make the flag seen by readers */
1069         smp_wmb();
1070 }
1071
1072 /**
1073  * tracing_off - turn off tracing buffers
1074  *
1075  * This function stops the tracing buffers from recording data.
1076  * It does not disable any overhead the tracers themselves may
1077  * be causing. This function simply causes all recording to
1078  * the ring buffers to fail.
1079  */
1080 void tracing_off(void)
1081 {
1082         tracer_tracing_off(&global_trace);
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_off);
1085
1086 void disable_trace_on_warning(void)
1087 {
1088         if (__disable_trace_on_warning)
1089                 tracing_off();
1090 }
1091
1092 /**
1093  * tracer_tracing_is_on - show real state of ring buffer enabled
1094  * @tr : the trace array to know if ring buffer is enabled
1095  *
1096  * Shows real state of the ring buffer if it is enabled or not.
1097  */
1098 int tracer_tracing_is_on(struct trace_array *tr)
1099 {
1100         if (tr->trace_buffer.buffer)
1101                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1102         return !tr->buffer_disabled;
1103 }
1104
1105 /**
1106  * tracing_is_on - show state of ring buffers enabled
1107  */
1108 int tracing_is_on(void)
1109 {
1110         return tracer_tracing_is_on(&global_trace);
1111 }
1112 EXPORT_SYMBOL_GPL(tracing_is_on);
1113
1114 static int __init set_buf_size(char *str)
1115 {
1116         unsigned long buf_size;
1117
1118         if (!str)
1119                 return 0;
1120         buf_size = memparse(str, &str);
1121         /*
1122          * nr_entries can not be zero and the startup
1123          * tests require some buffer space. Therefore
1124          * ensure we have at least 4096 bytes of buffer.
1125          */
1126         trace_buf_size = max(4096UL, buf_size);
1127         return 1;
1128 }
1129 __setup("trace_buf_size=", set_buf_size);
1130
1131 static int __init set_tracing_thresh(char *str)
1132 {
1133         unsigned long threshold;
1134         int ret;
1135
1136         if (!str)
1137                 return 0;
1138         ret = kstrtoul(str, 0, &threshold);
1139         if (ret < 0)
1140                 return 0;
1141         tracing_thresh = threshold * 1000;
1142         return 1;
1143 }
1144 __setup("tracing_thresh=", set_tracing_thresh);
1145
1146 unsigned long nsecs_to_usecs(unsigned long nsecs)
1147 {
1148         return nsecs / 1000;
1149 }
1150
1151 /*
1152  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1153  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1154  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1155  * of strings in the order that the evals (enum) were defined.
1156  */
1157 #undef C
1158 #define C(a, b) b
1159
1160 /* These must match the bit postions in trace_iterator_flags */
1161 static const char *trace_options[] = {
1162         TRACE_FLAGS
1163         NULL
1164 };
1165
1166 static struct {
1167         u64 (*func)(void);
1168         const char *name;
1169         int in_ns;              /* is this clock in nanoseconds? */
1170 } trace_clocks[] = {
1171         { trace_clock_local,            "local",        1 },
1172         { trace_clock_global,           "global",       1 },
1173         { trace_clock_counter,          "counter",      0 },
1174         { trace_clock_jiffies,          "uptime",       0 },
1175         { trace_clock,                  "perf",         1 },
1176         { ktime_get_mono_fast_ns,       "mono",         1 },
1177         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1178         { ktime_get_boot_fast_ns,       "boot",         1 },
1179         ARCH_TRACE_CLOCKS
1180 };
1181
1182 /*
1183  * trace_parser_get_init - gets the buffer for trace parser
1184  */
1185 int trace_parser_get_init(struct trace_parser *parser, int size)
1186 {
1187         memset(parser, 0, sizeof(*parser));
1188
1189         parser->buffer = kmalloc(size, GFP_KERNEL);
1190         if (!parser->buffer)
1191                 return 1;
1192
1193         parser->size = size;
1194         return 0;
1195 }
1196
1197 /*
1198  * trace_parser_put - frees the buffer for trace parser
1199  */
1200 void trace_parser_put(struct trace_parser *parser)
1201 {
1202         kfree(parser->buffer);
1203         parser->buffer = NULL;
1204 }
1205
1206 /*
1207  * trace_get_user - reads the user input string separated by  space
1208  * (matched by isspace(ch))
1209  *
1210  * For each string found the 'struct trace_parser' is updated,
1211  * and the function returns.
1212  *
1213  * Returns number of bytes read.
1214  *
1215  * See kernel/trace/trace.h for 'struct trace_parser' details.
1216  */
1217 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1218         size_t cnt, loff_t *ppos)
1219 {
1220         char ch;
1221         size_t read = 0;
1222         ssize_t ret;
1223
1224         if (!*ppos)
1225                 trace_parser_clear(parser);
1226
1227         ret = get_user(ch, ubuf++);
1228         if (ret)
1229                 goto out;
1230
1231         read++;
1232         cnt--;
1233
1234         /*
1235          * The parser is not finished with the last write,
1236          * continue reading the user input without skipping spaces.
1237          */
1238         if (!parser->cont) {
1239                 /* skip white space */
1240                 while (cnt && isspace(ch)) {
1241                         ret = get_user(ch, ubuf++);
1242                         if (ret)
1243                                 goto out;
1244                         read++;
1245                         cnt--;
1246                 }
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch)) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254
1255                 parser->idx = 0;
1256         }
1257
1258         /* read the non-space input */
1259         while (cnt && !isspace(ch)) {
1260                 if (parser->idx < parser->size - 1)
1261                         parser->buffer[parser->idx++] = ch;
1262                 else {
1263                         ret = -EINVAL;
1264                         goto out;
1265                 }
1266                 ret = get_user(ch, ubuf++);
1267                 if (ret)
1268                         goto out;
1269                 read++;
1270                 cnt--;
1271         }
1272
1273         /* We either got finished input or we have to wait for another call. */
1274         if (isspace(ch)) {
1275                 parser->buffer[parser->idx] = 0;
1276                 parser->cont = false;
1277         } else if (parser->idx < parser->size - 1) {
1278                 parser->cont = true;
1279                 parser->buffer[parser->idx++] = ch;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364
1365         if (tr->stop_count)
1366                 return;
1367
1368         WARN_ON_ONCE(!irqs_disabled());
1369
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         /* Inherit the recordable setting from trace_buffer */
1379         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1380                 ring_buffer_record_on(tr->max_buffer.buffer);
1381         else
1382                 ring_buffer_record_off(tr->max_buffer.buffer);
1383
1384         buf = tr->trace_buffer.buffer;
1385         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1386         tr->max_buffer.buffer = buf;
1387
1388         __update_max_tr(tr, tsk, cpu);
1389         arch_spin_unlock(&tr->max_lock);
1390 }
1391
1392 /**
1393  * update_max_tr_single - only copy one trace over, and reset the rest
1394  * @tr - tracer
1395  * @tsk - task with the latency
1396  * @cpu - the cpu of the buffer to copy.
1397  *
1398  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1399  */
1400 void
1401 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1402 {
1403         int ret;
1404
1405         if (tr->stop_count)
1406                 return;
1407
1408         WARN_ON_ONCE(!irqs_disabled());
1409         if (!tr->allocated_snapshot) {
1410                 /* Only the nop tracer should hit this when disabling */
1411                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1412                 return;
1413         }
1414
1415         arch_spin_lock(&tr->max_lock);
1416
1417         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1418
1419         if (ret == -EBUSY) {
1420                 /*
1421                  * We failed to swap the buffer due to a commit taking
1422                  * place on this CPU. We fail to record, but we reset
1423                  * the max trace buffer (no one writes directly to it)
1424                  * and flag that it failed.
1425                  */
1426                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1427                         "Failed to swap buffers due to commit in progress\n");
1428         }
1429
1430         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1431
1432         __update_max_tr(tr, tsk, cpu);
1433         arch_spin_unlock(&tr->max_lock);
1434 }
1435 #endif /* CONFIG_TRACER_MAX_TRACE */
1436
1437 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1438 {
1439         /* Iterators are static, they should be filled or empty */
1440         if (trace_buffer_iter(iter, iter->cpu_file))
1441                 return 0;
1442
1443         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1444                                 full);
1445 }
1446
1447 #ifdef CONFIG_FTRACE_STARTUP_TEST
1448 static bool selftests_can_run;
1449
1450 struct trace_selftests {
1451         struct list_head                list;
1452         struct tracer                   *type;
1453 };
1454
1455 static LIST_HEAD(postponed_selftests);
1456
1457 static int save_selftest(struct tracer *type)
1458 {
1459         struct trace_selftests *selftest;
1460
1461         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1462         if (!selftest)
1463                 return -ENOMEM;
1464
1465         selftest->type = type;
1466         list_add(&selftest->list, &postponed_selftests);
1467         return 0;
1468 }
1469
1470 static int run_tracer_selftest(struct tracer *type)
1471 {
1472         struct trace_array *tr = &global_trace;
1473         struct tracer *saved_tracer = tr->current_trace;
1474         int ret;
1475
1476         if (!type->selftest || tracing_selftest_disabled)
1477                 return 0;
1478
1479         /*
1480          * If a tracer registers early in boot up (before scheduling is
1481          * initialized and such), then do not run its selftests yet.
1482          * Instead, run it a little later in the boot process.
1483          */
1484         if (!selftests_can_run)
1485                 return save_selftest(type);
1486
1487         /*
1488          * Run a selftest on this tracer.
1489          * Here we reset the trace buffer, and set the current
1490          * tracer to be this tracer. The tracer can then run some
1491          * internal tracing to verify that everything is in order.
1492          * If we fail, we do not register this tracer.
1493          */
1494         tracing_reset_online_cpus(&tr->trace_buffer);
1495
1496         tr->current_trace = type;
1497
1498 #ifdef CONFIG_TRACER_MAX_TRACE
1499         if (type->use_max_tr) {
1500                 /* If we expanded the buffers, make sure the max is expanded too */
1501                 if (ring_buffer_expanded)
1502                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1503                                            RING_BUFFER_ALL_CPUS);
1504                 tr->allocated_snapshot = true;
1505         }
1506 #endif
1507
1508         /* the test is responsible for initializing and enabling */
1509         pr_info("Testing tracer %s: ", type->name);
1510         ret = type->selftest(type, tr);
1511         /* the test is responsible for resetting too */
1512         tr->current_trace = saved_tracer;
1513         if (ret) {
1514                 printk(KERN_CONT "FAILED!\n");
1515                 /* Add the warning after printing 'FAILED' */
1516                 WARN_ON(1);
1517                 return -1;
1518         }
1519         /* Only reset on passing, to avoid touching corrupted buffers */
1520         tracing_reset_online_cpus(&tr->trace_buffer);
1521
1522 #ifdef CONFIG_TRACER_MAX_TRACE
1523         if (type->use_max_tr) {
1524                 tr->allocated_snapshot = false;
1525
1526                 /* Shrink the max buffer again */
1527                 if (ring_buffer_expanded)
1528                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1529                                            RING_BUFFER_ALL_CPUS);
1530         }
1531 #endif
1532
1533         printk(KERN_CONT "PASSED\n");
1534         return 0;
1535 }
1536
1537 static __init int init_trace_selftests(void)
1538 {
1539         struct trace_selftests *p, *n;
1540         struct tracer *t, **last;
1541         int ret;
1542
1543         selftests_can_run = true;
1544
1545         mutex_lock(&trace_types_lock);
1546
1547         if (list_empty(&postponed_selftests))
1548                 goto out;
1549
1550         pr_info("Running postponed tracer tests:\n");
1551
1552         tracing_selftest_running = true;
1553         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1554                 ret = run_tracer_selftest(p->type);
1555                 /* If the test fails, then warn and remove from available_tracers */
1556                 if (ret < 0) {
1557                         WARN(1, "tracer: %s failed selftest, disabling\n",
1558                              p->type->name);
1559                         last = &trace_types;
1560                         for (t = trace_types; t; t = t->next) {
1561                                 if (t == p->type) {
1562                                         *last = t->next;
1563                                         break;
1564                                 }
1565                                 last = &t->next;
1566                         }
1567                 }
1568                 list_del(&p->list);
1569                 kfree(p);
1570         }
1571         tracing_selftest_running = false;
1572
1573  out:
1574         mutex_unlock(&trace_types_lock);
1575
1576         return 0;
1577 }
1578 core_initcall(init_trace_selftests);
1579 #else
1580 static inline int run_tracer_selftest(struct tracer *type)
1581 {
1582         return 0;
1583 }
1584 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1585
1586 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1587
1588 static void __init apply_trace_boot_options(void);
1589
1590 /**
1591  * register_tracer - register a tracer with the ftrace system.
1592  * @type - the plugin for the tracer
1593  *
1594  * Register a new plugin tracer.
1595  */
1596 int __init register_tracer(struct tracer *type)
1597 {
1598         struct tracer *t;
1599         int ret = 0;
1600
1601         if (!type->name) {
1602                 pr_info("Tracer must have a name\n");
1603                 return -1;
1604         }
1605
1606         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1607                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1608                 return -1;
1609         }
1610
1611         mutex_lock(&trace_types_lock);
1612
1613         tracing_selftest_running = true;
1614
1615         for (t = trace_types; t; t = t->next) {
1616                 if (strcmp(type->name, t->name) == 0) {
1617                         /* already found */
1618                         pr_info("Tracer %s already registered\n",
1619                                 type->name);
1620                         ret = -1;
1621                         goto out;
1622                 }
1623         }
1624
1625         if (!type->set_flag)
1626                 type->set_flag = &dummy_set_flag;
1627         if (!type->flags) {
1628                 /*allocate a dummy tracer_flags*/
1629                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1630                 if (!type->flags) {
1631                         ret = -ENOMEM;
1632                         goto out;
1633                 }
1634                 type->flags->val = 0;
1635                 type->flags->opts = dummy_tracer_opt;
1636         } else
1637                 if (!type->flags->opts)
1638                         type->flags->opts = dummy_tracer_opt;
1639
1640         /* store the tracer for __set_tracer_option */
1641         type->flags->trace = type;
1642
1643         ret = run_tracer_selftest(type);
1644         if (ret < 0)
1645                 goto out;
1646
1647         type->next = trace_types;
1648         trace_types = type;
1649         add_tracer_options(&global_trace, type);
1650
1651  out:
1652         tracing_selftest_running = false;
1653         mutex_unlock(&trace_types_lock);
1654
1655         if (ret || !default_bootup_tracer)
1656                 goto out_unlock;
1657
1658         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1659                 goto out_unlock;
1660
1661         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1662         /* Do we want this tracer to start on bootup? */
1663         tracing_set_tracer(&global_trace, type->name);
1664         default_bootup_tracer = NULL;
1665
1666         apply_trace_boot_options();
1667
1668         /* disable other selftests, since this will break it. */
1669         tracing_selftest_disabled = true;
1670 #ifdef CONFIG_FTRACE_STARTUP_TEST
1671         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1672                type->name);
1673 #endif
1674
1675  out_unlock:
1676         return ret;
1677 }
1678
1679 void tracing_reset(struct trace_buffer *buf, int cpu)
1680 {
1681         struct ring_buffer *buffer = buf->buffer;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690         ring_buffer_reset_cpu(buffer, cpu);
1691
1692         ring_buffer_record_enable(buffer);
1693 }
1694
1695 void tracing_reset_online_cpus(struct trace_buffer *buf)
1696 {
1697         struct ring_buffer *buffer = buf->buffer;
1698         int cpu;
1699
1700         if (!buffer)
1701                 return;
1702
1703         ring_buffer_record_disable(buffer);
1704
1705         /* Make sure all commits have finished */
1706         synchronize_sched();
1707
1708         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1709
1710         for_each_online_cpu(cpu)
1711                 ring_buffer_reset_cpu(buffer, cpu);
1712
1713         ring_buffer_record_enable(buffer);
1714 }
1715
1716 /* Must have trace_types_lock held */
1717 void tracing_reset_all_online_cpus(void)
1718 {
1719         struct trace_array *tr;
1720
1721         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1722                 if (!tr->clear_trace)
1723                         continue;
1724                 tr->clear_trace = false;
1725                 tracing_reset_online_cpus(&tr->trace_buffer);
1726 #ifdef CONFIG_TRACER_MAX_TRACE
1727                 tracing_reset_online_cpus(&tr->max_buffer);
1728 #endif
1729         }
1730 }
1731
1732 /*
1733  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1734  * is the tgid last observed corresponding to pid=i.
1735  */
1736 static int *tgid_map;
1737
1738 /* The maximum valid index into tgid_map. */
1739 static size_t tgid_map_max;
1740
1741 #define SAVED_CMDLINES_DEFAULT 128
1742 #define NO_CMDLINE_MAP UINT_MAX
1743 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1744 struct saved_cmdlines_buffer {
1745         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1746         unsigned *map_cmdline_to_pid;
1747         unsigned cmdline_num;
1748         int cmdline_idx;
1749         char *saved_cmdlines;
1750 };
1751 static struct saved_cmdlines_buffer *savedcmd;
1752
1753 static inline char *get_saved_cmdlines(int idx)
1754 {
1755         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1756 }
1757
1758 static inline void set_cmdline(int idx, const char *cmdline)
1759 {
1760         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1761 }
1762
1763 static int allocate_cmdlines_buffer(unsigned int val,
1764                                     struct saved_cmdlines_buffer *s)
1765 {
1766         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1767                                         GFP_KERNEL);
1768         if (!s->map_cmdline_to_pid)
1769                 return -ENOMEM;
1770
1771         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1772         if (!s->saved_cmdlines) {
1773                 kfree(s->map_cmdline_to_pid);
1774                 return -ENOMEM;
1775         }
1776
1777         s->cmdline_idx = 0;
1778         s->cmdline_num = val;
1779         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1780                sizeof(s->map_pid_to_cmdline));
1781         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1782                val * sizeof(*s->map_cmdline_to_pid));
1783
1784         return 0;
1785 }
1786
1787 static int trace_create_savedcmd(void)
1788 {
1789         int ret;
1790
1791         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1792         if (!savedcmd)
1793                 return -ENOMEM;
1794
1795         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1796         if (ret < 0) {
1797                 kfree(savedcmd);
1798                 savedcmd = NULL;
1799                 return -ENOMEM;
1800         }
1801
1802         return 0;
1803 }
1804
1805 int is_tracing_stopped(void)
1806 {
1807         return global_trace.stop_count;
1808 }
1809
1810 /**
1811  * tracing_start - quick start of the tracer
1812  *
1813  * If tracing is enabled but was stopped by tracing_stop,
1814  * this will start the tracer back up.
1815  */
1816 void tracing_start(void)
1817 {
1818         struct ring_buffer *buffer;
1819         unsigned long flags;
1820
1821         if (tracing_disabled)
1822                 return;
1823
1824         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1825         if (--global_trace.stop_count) {
1826                 if (global_trace.stop_count < 0) {
1827                         /* Someone screwed up their debugging */
1828                         WARN_ON_ONCE(1);
1829                         global_trace.stop_count = 0;
1830                 }
1831                 goto out;
1832         }
1833
1834         /* Prevent the buffers from switching */
1835         arch_spin_lock(&global_trace.max_lock);
1836
1837         buffer = global_trace.trace_buffer.buffer;
1838         if (buffer)
1839                 ring_buffer_record_enable(buffer);
1840
1841 #ifdef CONFIG_TRACER_MAX_TRACE
1842         buffer = global_trace.max_buffer.buffer;
1843         if (buffer)
1844                 ring_buffer_record_enable(buffer);
1845 #endif
1846
1847         arch_spin_unlock(&global_trace.max_lock);
1848
1849  out:
1850         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1851 }
1852
1853 static void tracing_start_tr(struct trace_array *tr)
1854 {
1855         struct ring_buffer *buffer;
1856         unsigned long flags;
1857
1858         if (tracing_disabled)
1859                 return;
1860
1861         /* If global, we need to also start the max tracer */
1862         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1863                 return tracing_start();
1864
1865         raw_spin_lock_irqsave(&tr->start_lock, flags);
1866
1867         if (--tr->stop_count) {
1868                 if (tr->stop_count < 0) {
1869                         /* Someone screwed up their debugging */
1870                         WARN_ON_ONCE(1);
1871                         tr->stop_count = 0;
1872                 }
1873                 goto out;
1874         }
1875
1876         buffer = tr->trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_enable(buffer);
1879
1880  out:
1881         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1882 }
1883
1884 /**
1885  * tracing_stop - quick stop of the tracer
1886  *
1887  * Light weight way to stop tracing. Use in conjunction with
1888  * tracing_start.
1889  */
1890 void tracing_stop(void)
1891 {
1892         struct ring_buffer *buffer;
1893         unsigned long flags;
1894
1895         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1896         if (global_trace.stop_count++)
1897                 goto out;
1898
1899         /* Prevent the buffers from switching */
1900         arch_spin_lock(&global_trace.max_lock);
1901
1902         buffer = global_trace.trace_buffer.buffer;
1903         if (buffer)
1904                 ring_buffer_record_disable(buffer);
1905
1906 #ifdef CONFIG_TRACER_MAX_TRACE
1907         buffer = global_trace.max_buffer.buffer;
1908         if (buffer)
1909                 ring_buffer_record_disable(buffer);
1910 #endif
1911
1912         arch_spin_unlock(&global_trace.max_lock);
1913
1914  out:
1915         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1916 }
1917
1918 static void tracing_stop_tr(struct trace_array *tr)
1919 {
1920         struct ring_buffer *buffer;
1921         unsigned long flags;
1922
1923         /* If global, we need to also stop the max tracer */
1924         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1925                 return tracing_stop();
1926
1927         raw_spin_lock_irqsave(&tr->start_lock, flags);
1928         if (tr->stop_count++)
1929                 goto out;
1930
1931         buffer = tr->trace_buffer.buffer;
1932         if (buffer)
1933                 ring_buffer_record_disable(buffer);
1934
1935  out:
1936         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1937 }
1938
1939 static int trace_save_cmdline(struct task_struct *tsk)
1940 {
1941         unsigned tpid, idx;
1942
1943         /* treat recording of idle task as a success */
1944         if (!tsk->pid)
1945                 return 1;
1946
1947         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1948
1949         /*
1950          * It's not the end of the world if we don't get
1951          * the lock, but we also don't want to spin
1952          * nor do we want to disable interrupts,
1953          * so if we miss here, then better luck next time.
1954          */
1955         if (!arch_spin_trylock(&trace_cmdline_lock))
1956                 return 0;
1957
1958         idx = savedcmd->map_pid_to_cmdline[tpid];
1959         if (idx == NO_CMDLINE_MAP) {
1960                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1961
1962                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1963                 savedcmd->cmdline_idx = idx;
1964         }
1965
1966         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1967         set_cmdline(idx, tsk->comm);
1968
1969         arch_spin_unlock(&trace_cmdline_lock);
1970
1971         return 1;
1972 }
1973
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976         unsigned map;
1977         int tpid;
1978
1979         if (!pid) {
1980                 strcpy(comm, "<idle>");
1981                 return;
1982         }
1983
1984         if (WARN_ON_ONCE(pid < 0)) {
1985                 strcpy(comm, "<XXX>");
1986                 return;
1987         }
1988
1989         tpid = pid & (PID_MAX_DEFAULT - 1);
1990         map = savedcmd->map_pid_to_cmdline[tpid];
1991         if (map != NO_CMDLINE_MAP) {
1992                 tpid = savedcmd->map_cmdline_to_pid[map];
1993                 if (tpid == pid) {
1994                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1995                         return;
1996                 }
1997         }
1998         strcpy(comm, "<...>");
1999 }
2000
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003         preempt_disable();
2004         arch_spin_lock(&trace_cmdline_lock);
2005
2006         __trace_find_cmdline(pid, comm);
2007
2008         arch_spin_unlock(&trace_cmdline_lock);
2009         preempt_enable();
2010 }
2011
2012 static int *trace_find_tgid_ptr(int pid)
2013 {
2014         /*
2015          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2016          * if we observe a non-NULL tgid_map then we also observe the correct
2017          * tgid_map_max.
2018          */
2019         int *map = smp_load_acquire(&tgid_map);
2020
2021         if (unlikely(!map || pid > tgid_map_max))
2022                 return NULL;
2023
2024         return &map[pid];
2025 }
2026
2027 int trace_find_tgid(int pid)
2028 {
2029         int *ptr = trace_find_tgid_ptr(pid);
2030
2031         return ptr ? *ptr : 0;
2032 }
2033
2034 static int trace_save_tgid(struct task_struct *tsk)
2035 {
2036         int *ptr;
2037
2038         /* treat recording of idle task as a success */
2039         if (!tsk->pid)
2040                 return 1;
2041
2042         ptr = trace_find_tgid_ptr(tsk->pid);
2043         if (!ptr)
2044                 return 0;
2045
2046         *ptr = tsk->tgid;
2047         return 1;
2048 }
2049
2050 static bool tracing_record_taskinfo_skip(int flags)
2051 {
2052         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2053                 return true;
2054         if (!__this_cpu_read(trace_taskinfo_save))
2055                 return true;
2056         return false;
2057 }
2058
2059 /**
2060  * tracing_record_taskinfo - record the task info of a task
2061  *
2062  * @task  - task to record
2063  * @flags - TRACE_RECORD_CMDLINE for recording comm
2064  *        - TRACE_RECORD_TGID for recording tgid
2065  */
2066 void tracing_record_taskinfo(struct task_struct *task, int flags)
2067 {
2068         bool done;
2069
2070         if (tracing_record_taskinfo_skip(flags))
2071                 return;
2072
2073         /*
2074          * Record as much task information as possible. If some fail, continue
2075          * to try to record the others.
2076          */
2077         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2078         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2079
2080         /* If recording any information failed, retry again soon. */
2081         if (!done)
2082                 return;
2083
2084         __this_cpu_write(trace_taskinfo_save, false);
2085 }
2086
2087 /**
2088  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2089  *
2090  * @prev - previous task during sched_switch
2091  * @next - next task during sched_switch
2092  * @flags - TRACE_RECORD_CMDLINE for recording comm
2093  *          TRACE_RECORD_TGID for recording tgid
2094  */
2095 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2096                                           struct task_struct *next, int flags)
2097 {
2098         bool done;
2099
2100         if (tracing_record_taskinfo_skip(flags))
2101                 return;
2102
2103         /*
2104          * Record as much task information as possible. If some fail, continue
2105          * to try to record the others.
2106          */
2107         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2108         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2109         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2110         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2111
2112         /* If recording any information failed, retry again soon. */
2113         if (!done)
2114                 return;
2115
2116         __this_cpu_write(trace_taskinfo_save, false);
2117 }
2118
2119 /* Helpers to record a specific task information */
2120 void tracing_record_cmdline(struct task_struct *task)
2121 {
2122         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2123 }
2124
2125 void tracing_record_tgid(struct task_struct *task)
2126 {
2127         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2128 }
2129
2130 /*
2131  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2132  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2133  * simplifies those functions and keeps them in sync.
2134  */
2135 enum print_line_t trace_handle_return(struct trace_seq *s)
2136 {
2137         return trace_seq_has_overflowed(s) ?
2138                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2139 }
2140 EXPORT_SYMBOL_GPL(trace_handle_return);
2141
2142 void
2143 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2144                              int pc)
2145 {
2146         struct task_struct *tsk = current;
2147
2148         entry->preempt_count            = pc & 0xff;
2149         entry->pid                      = (tsk) ? tsk->pid : 0;
2150         entry->flags =
2151 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2152                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2153 #else
2154                 TRACE_FLAG_IRQS_NOSUPPORT |
2155 #endif
2156                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2157                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2158                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2159                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2160                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2161 }
2162 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2163
2164 struct ring_buffer_event *
2165 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2166                           int type,
2167                           unsigned long len,
2168                           unsigned long flags, int pc)
2169 {
2170         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2171 }
2172
2173 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2174 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2175 static int trace_buffered_event_ref;
2176
2177 /**
2178  * trace_buffered_event_enable - enable buffering events
2179  *
2180  * When events are being filtered, it is quicker to use a temporary
2181  * buffer to write the event data into if there's a likely chance
2182  * that it will not be committed. The discard of the ring buffer
2183  * is not as fast as committing, and is much slower than copying
2184  * a commit.
2185  *
2186  * When an event is to be filtered, allocate per cpu buffers to
2187  * write the event data into, and if the event is filtered and discarded
2188  * it is simply dropped, otherwise, the entire data is to be committed
2189  * in one shot.
2190  */
2191 void trace_buffered_event_enable(void)
2192 {
2193         struct ring_buffer_event *event;
2194         struct page *page;
2195         int cpu;
2196
2197         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2198
2199         if (trace_buffered_event_ref++)
2200                 return;
2201
2202         for_each_tracing_cpu(cpu) {
2203                 page = alloc_pages_node(cpu_to_node(cpu),
2204                                         GFP_KERNEL | __GFP_NORETRY, 0);
2205                 if (!page)
2206                         goto failed;
2207
2208                 event = page_address(page);
2209                 memset(event, 0, sizeof(*event));
2210
2211                 per_cpu(trace_buffered_event, cpu) = event;
2212
2213                 preempt_disable();
2214                 if (cpu == smp_processor_id() &&
2215                     this_cpu_read(trace_buffered_event) !=
2216                     per_cpu(trace_buffered_event, cpu))
2217                         WARN_ON_ONCE(1);
2218                 preempt_enable();
2219         }
2220
2221         return;
2222  failed:
2223         trace_buffered_event_disable();
2224 }
2225
2226 static void enable_trace_buffered_event(void *data)
2227 {
2228         /* Probably not needed, but do it anyway */
2229         smp_rmb();
2230         this_cpu_dec(trace_buffered_event_cnt);
2231 }
2232
2233 static void disable_trace_buffered_event(void *data)
2234 {
2235         this_cpu_inc(trace_buffered_event_cnt);
2236 }
2237
2238 /**
2239  * trace_buffered_event_disable - disable buffering events
2240  *
2241  * When a filter is removed, it is faster to not use the buffered
2242  * events, and to commit directly into the ring buffer. Free up
2243  * the temp buffers when there are no more users. This requires
2244  * special synchronization with current events.
2245  */
2246 void trace_buffered_event_disable(void)
2247 {
2248         int cpu;
2249
2250         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2251
2252         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2253                 return;
2254
2255         if (--trace_buffered_event_ref)
2256                 return;
2257
2258         preempt_disable();
2259         /* For each CPU, set the buffer as used. */
2260         smp_call_function_many(tracing_buffer_mask,
2261                                disable_trace_buffered_event, NULL, 1);
2262         preempt_enable();
2263
2264         /* Wait for all current users to finish */
2265         synchronize_sched();
2266
2267         for_each_tracing_cpu(cpu) {
2268                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2269                 per_cpu(trace_buffered_event, cpu) = NULL;
2270         }
2271         /*
2272          * Make sure trace_buffered_event is NULL before clearing
2273          * trace_buffered_event_cnt.
2274          */
2275         smp_wmb();
2276
2277         preempt_disable();
2278         /* Do the work on each cpu */
2279         smp_call_function_many(tracing_buffer_mask,
2280                                enable_trace_buffered_event, NULL, 1);
2281         preempt_enable();
2282 }
2283
2284 static struct ring_buffer *temp_buffer;
2285
2286 struct ring_buffer_event *
2287 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2288                           struct trace_event_file *trace_file,
2289                           int type, unsigned long len,
2290                           unsigned long flags, int pc)
2291 {
2292         struct ring_buffer_event *entry;
2293         int val;
2294
2295         *current_rb = trace_file->tr->trace_buffer.buffer;
2296
2297         if ((trace_file->flags &
2298              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2299             (entry = this_cpu_read(trace_buffered_event))) {
2300                 /* Try to use the per cpu buffer first */
2301                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2302                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2303                         trace_event_setup(entry, type, flags, pc);
2304                         entry->array[0] = len;
2305                         return entry;
2306                 }
2307                 this_cpu_dec(trace_buffered_event_cnt);
2308         }
2309
2310         entry = __trace_buffer_lock_reserve(*current_rb,
2311                                             type, len, flags, pc);
2312         /*
2313          * If tracing is off, but we have triggers enabled
2314          * we still need to look at the event data. Use the temp_buffer
2315          * to store the trace event for the tigger to use. It's recusive
2316          * safe and will not be recorded anywhere.
2317          */
2318         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2319                 *current_rb = temp_buffer;
2320                 entry = __trace_buffer_lock_reserve(*current_rb,
2321                                                     type, len, flags, pc);
2322         }
2323         return entry;
2324 }
2325 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2326
2327 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2328 static DEFINE_MUTEX(tracepoint_printk_mutex);
2329
2330 static void output_printk(struct trace_event_buffer *fbuffer)
2331 {
2332         struct trace_event_call *event_call;
2333         struct trace_event *event;
2334         unsigned long flags;
2335         struct trace_iterator *iter = tracepoint_print_iter;
2336
2337         /* We should never get here if iter is NULL */
2338         if (WARN_ON_ONCE(!iter))
2339                 return;
2340
2341         event_call = fbuffer->trace_file->event_call;
2342         if (!event_call || !event_call->event.funcs ||
2343             !event_call->event.funcs->trace)
2344                 return;
2345
2346         event = &fbuffer->trace_file->event_call->event;
2347
2348         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2349         trace_seq_init(&iter->seq);
2350         iter->ent = fbuffer->entry;
2351         event_call->event.funcs->trace(iter, 0, event);
2352         trace_seq_putc(&iter->seq, 0);
2353         printk("%s", iter->seq.buffer);
2354
2355         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2356 }
2357
2358 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2359                              void __user *buffer, size_t *lenp,
2360                              loff_t *ppos)
2361 {
2362         int save_tracepoint_printk;
2363         int ret;
2364
2365         mutex_lock(&tracepoint_printk_mutex);
2366         save_tracepoint_printk = tracepoint_printk;
2367
2368         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2369
2370         /*
2371          * This will force exiting early, as tracepoint_printk
2372          * is always zero when tracepoint_printk_iter is not allocated
2373          */
2374         if (!tracepoint_print_iter)
2375                 tracepoint_printk = 0;
2376
2377         if (save_tracepoint_printk == tracepoint_printk)
2378                 goto out;
2379
2380         if (tracepoint_printk)
2381                 static_key_enable(&tracepoint_printk_key.key);
2382         else
2383                 static_key_disable(&tracepoint_printk_key.key);
2384
2385  out:
2386         mutex_unlock(&tracepoint_printk_mutex);
2387
2388         return ret;
2389 }
2390
2391 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2392 {
2393         if (static_key_false(&tracepoint_printk_key.key))
2394                 output_printk(fbuffer);
2395
2396         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2397                                     fbuffer->event, fbuffer->entry,
2398                                     fbuffer->flags, fbuffer->pc);
2399 }
2400 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2401
2402 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2403                                      struct ring_buffer *buffer,
2404                                      struct ring_buffer_event *event,
2405                                      unsigned long flags, int pc,
2406                                      struct pt_regs *regs)
2407 {
2408         __buffer_unlock_commit(buffer, event);
2409
2410         /*
2411          * If regs is not set, then skip the following callers:
2412          *   trace_buffer_unlock_commit_regs
2413          *   event_trigger_unlock_commit
2414          *   trace_event_buffer_commit
2415          *   trace_event_raw_event_sched_switch
2416          * Note, we can still get here via blktrace, wakeup tracer
2417          * and mmiotrace, but that's ok if they lose a function or
2418          * two. They are that meaningful.
2419          */
2420         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2421         ftrace_trace_userstack(tr, buffer, flags, pc);
2422 }
2423
2424 /*
2425  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2426  */
2427 void
2428 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2429                                    struct ring_buffer_event *event)
2430 {
2431         __buffer_unlock_commit(buffer, event);
2432 }
2433
2434 static void
2435 trace_process_export(struct trace_export *export,
2436                struct ring_buffer_event *event)
2437 {
2438         struct trace_entry *entry;
2439         unsigned int size = 0;
2440
2441         entry = ring_buffer_event_data(event);
2442         size = ring_buffer_event_length(event);
2443         export->write(entry, size);
2444 }
2445
2446 static DEFINE_MUTEX(ftrace_export_lock);
2447
2448 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2449
2450 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2451
2452 static inline void ftrace_exports_enable(void)
2453 {
2454         static_branch_enable(&ftrace_exports_enabled);
2455 }
2456
2457 static inline void ftrace_exports_disable(void)
2458 {
2459         static_branch_disable(&ftrace_exports_enabled);
2460 }
2461
2462 void ftrace_exports(struct ring_buffer_event *event)
2463 {
2464         struct trace_export *export;
2465
2466         preempt_disable_notrace();
2467
2468         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2469         while (export) {
2470                 trace_process_export(export, event);
2471                 export = rcu_dereference_raw_notrace(export->next);
2472         }
2473
2474         preempt_enable_notrace();
2475 }
2476
2477 static inline void
2478 add_trace_export(struct trace_export **list, struct trace_export *export)
2479 {
2480         rcu_assign_pointer(export->next, *list);
2481         /*
2482          * We are entering export into the list but another
2483          * CPU might be walking that list. We need to make sure
2484          * the export->next pointer is valid before another CPU sees
2485          * the export pointer included into the list.
2486          */
2487         rcu_assign_pointer(*list, export);
2488 }
2489
2490 static inline int
2491 rm_trace_export(struct trace_export **list, struct trace_export *export)
2492 {
2493         struct trace_export **p;
2494
2495         for (p = list; *p != NULL; p = &(*p)->next)
2496                 if (*p == export)
2497                         break;
2498
2499         if (*p != export)
2500                 return -1;
2501
2502         rcu_assign_pointer(*p, (*p)->next);
2503
2504         return 0;
2505 }
2506
2507 static inline void
2508 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2509 {
2510         if (*list == NULL)
2511                 ftrace_exports_enable();
2512
2513         add_trace_export(list, export);
2514 }
2515
2516 static inline int
2517 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2518 {
2519         int ret;
2520
2521         ret = rm_trace_export(list, export);
2522         if (*list == NULL)
2523                 ftrace_exports_disable();
2524
2525         return ret;
2526 }
2527
2528 int register_ftrace_export(struct trace_export *export)
2529 {
2530         if (WARN_ON_ONCE(!export->write))
2531                 return -1;
2532
2533         mutex_lock(&ftrace_export_lock);
2534
2535         add_ftrace_export(&ftrace_exports_list, export);
2536
2537         mutex_unlock(&ftrace_export_lock);
2538
2539         return 0;
2540 }
2541 EXPORT_SYMBOL_GPL(register_ftrace_export);
2542
2543 int unregister_ftrace_export(struct trace_export *export)
2544 {
2545         int ret;
2546
2547         mutex_lock(&ftrace_export_lock);
2548
2549         ret = rm_ftrace_export(&ftrace_exports_list, export);
2550
2551         mutex_unlock(&ftrace_export_lock);
2552
2553         return ret;
2554 }
2555 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2556
2557 void
2558 trace_function(struct trace_array *tr,
2559                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2560                int pc)
2561 {
2562         struct trace_event_call *call = &event_function;
2563         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2564         struct ring_buffer_event *event;
2565         struct ftrace_entry *entry;
2566
2567         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2568                                             flags, pc);
2569         if (!event)
2570                 return;
2571         entry   = ring_buffer_event_data(event);
2572         entry->ip                       = ip;
2573         entry->parent_ip                = parent_ip;
2574
2575         if (!call_filter_check_discard(call, entry, buffer, event)) {
2576                 if (static_branch_unlikely(&ftrace_exports_enabled))
2577                         ftrace_exports(event);
2578                 __buffer_unlock_commit(buffer, event);
2579         }
2580 }
2581
2582 #ifdef CONFIG_STACKTRACE
2583
2584 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2585 struct ftrace_stack {
2586         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2587 };
2588
2589 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2590 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2591
2592 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2593                                  unsigned long flags,
2594                                  int skip, int pc, struct pt_regs *regs)
2595 {
2596         struct trace_event_call *call = &event_kernel_stack;
2597         struct ring_buffer_event *event;
2598         struct stack_entry *entry;
2599         struct stack_trace trace;
2600         int use_stack;
2601         int size = FTRACE_STACK_ENTRIES;
2602
2603         trace.nr_entries        = 0;
2604         trace.skip              = skip;
2605
2606         /*
2607          * Add two, for this function and the call to save_stack_trace()
2608          * If regs is set, then these functions will not be in the way.
2609          */
2610         if (!regs)
2611                 trace.skip += 2;
2612
2613         /*
2614          * Since events can happen in NMIs there's no safe way to
2615          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2616          * or NMI comes in, it will just have to use the default
2617          * FTRACE_STACK_SIZE.
2618          */
2619         preempt_disable_notrace();
2620
2621         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2622         /*
2623          * We don't need any atomic variables, just a barrier.
2624          * If an interrupt comes in, we don't care, because it would
2625          * have exited and put the counter back to what we want.
2626          * We just need a barrier to keep gcc from moving things
2627          * around.
2628          */
2629         barrier();
2630         if (use_stack == 1) {
2631                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2632                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2633
2634                 if (regs)
2635                         save_stack_trace_regs(regs, &trace);
2636                 else
2637                         save_stack_trace(&trace);
2638
2639                 if (trace.nr_entries > size)
2640                         size = trace.nr_entries;
2641         } else
2642                 /* From now on, use_stack is a boolean */
2643                 use_stack = 0;
2644
2645         size *= sizeof(unsigned long);
2646
2647         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2648                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2649                                     flags, pc);
2650         if (!event)
2651                 goto out;
2652         entry = ring_buffer_event_data(event);
2653
2654         memset(&entry->caller, 0, size);
2655
2656         if (use_stack)
2657                 memcpy(&entry->caller, trace.entries,
2658                        trace.nr_entries * sizeof(unsigned long));
2659         else {
2660                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2661                 trace.entries           = entry->caller;
2662                 if (regs)
2663                         save_stack_trace_regs(regs, &trace);
2664                 else
2665                         save_stack_trace(&trace);
2666         }
2667
2668         entry->size = trace.nr_entries;
2669
2670         if (!call_filter_check_discard(call, entry, buffer, event))
2671                 __buffer_unlock_commit(buffer, event);
2672
2673  out:
2674         /* Again, don't let gcc optimize things here */
2675         barrier();
2676         __this_cpu_dec(ftrace_stack_reserve);
2677         preempt_enable_notrace();
2678
2679 }
2680
2681 static inline void ftrace_trace_stack(struct trace_array *tr,
2682                                       struct ring_buffer *buffer,
2683                                       unsigned long flags,
2684                                       int skip, int pc, struct pt_regs *regs)
2685 {
2686         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2687                 return;
2688
2689         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2690 }
2691
2692 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2693                    int pc)
2694 {
2695         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2696
2697         if (rcu_is_watching()) {
2698                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2699                 return;
2700         }
2701
2702         /*
2703          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2704          * but if the above rcu_is_watching() failed, then the NMI
2705          * triggered someplace critical, and rcu_irq_enter() should
2706          * not be called from NMI.
2707          */
2708         if (unlikely(in_nmi()))
2709                 return;
2710
2711         /*
2712          * It is possible that a function is being traced in a
2713          * location that RCU is not watching. A call to
2714          * rcu_irq_enter() will make sure that it is, but there's
2715          * a few internal rcu functions that could be traced
2716          * where that wont work either. In those cases, we just
2717          * do nothing.
2718          */
2719         if (unlikely(rcu_irq_enter_disabled()))
2720                 return;
2721
2722         rcu_irq_enter_irqson();
2723         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2724         rcu_irq_exit_irqson();
2725 }
2726
2727 /**
2728  * trace_dump_stack - record a stack back trace in the trace buffer
2729  * @skip: Number of functions to skip (helper handlers)
2730  */
2731 void trace_dump_stack(int skip)
2732 {
2733         unsigned long flags;
2734
2735         if (tracing_disabled || tracing_selftest_running)
2736                 return;
2737
2738         local_save_flags(flags);
2739
2740         /*
2741          * Skip 3 more, seems to get us at the caller of
2742          * this function.
2743          */
2744         skip += 3;
2745         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2746                              flags, skip, preempt_count(), NULL);
2747 }
2748
2749 static DEFINE_PER_CPU(int, user_stack_count);
2750
2751 void
2752 ftrace_trace_userstack(struct trace_array *tr,
2753                        struct ring_buffer *buffer, unsigned long flags, int pc)
2754 {
2755         struct trace_event_call *call = &event_user_stack;
2756         struct ring_buffer_event *event;
2757         struct userstack_entry *entry;
2758         struct stack_trace trace;
2759
2760         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2761                 return;
2762
2763         /*
2764          * NMIs can not handle page faults, even with fix ups.
2765          * The save user stack can (and often does) fault.
2766          */
2767         if (unlikely(in_nmi()))
2768                 return;
2769
2770         /*
2771          * prevent recursion, since the user stack tracing may
2772          * trigger other kernel events.
2773          */
2774         preempt_disable();
2775         if (__this_cpu_read(user_stack_count))
2776                 goto out;
2777
2778         __this_cpu_inc(user_stack_count);
2779
2780         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2781                                             sizeof(*entry), flags, pc);
2782         if (!event)
2783                 goto out_drop_count;
2784         entry   = ring_buffer_event_data(event);
2785
2786         entry->tgid             = current->tgid;
2787         memset(&entry->caller, 0, sizeof(entry->caller));
2788
2789         trace.nr_entries        = 0;
2790         trace.max_entries       = FTRACE_STACK_ENTRIES;
2791         trace.skip              = 0;
2792         trace.entries           = entry->caller;
2793
2794         save_stack_trace_user(&trace);
2795         if (!call_filter_check_discard(call, entry, buffer, event))
2796                 __buffer_unlock_commit(buffer, event);
2797
2798  out_drop_count:
2799         __this_cpu_dec(user_stack_count);
2800  out:
2801         preempt_enable();
2802 }
2803
2804 #ifdef UNUSED
2805 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2806 {
2807         ftrace_trace_userstack(tr, flags, preempt_count());
2808 }
2809 #endif /* UNUSED */
2810
2811 #endif /* CONFIG_STACKTRACE */
2812
2813 /* created for use with alloc_percpu */
2814 struct trace_buffer_struct {
2815         int nesting;
2816         char buffer[4][TRACE_BUF_SIZE];
2817 };
2818
2819 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2820
2821 /*
2822  * Thise allows for lockless recording.  If we're nested too deeply, then
2823  * this returns NULL.
2824  */
2825 static char *get_trace_buf(void)
2826 {
2827         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2828
2829         if (!trace_percpu_buffer || buffer->nesting >= 4)
2830                 return NULL;
2831
2832         buffer->nesting++;
2833
2834         /* Interrupts must see nesting incremented before we use the buffer */
2835         barrier();
2836         return &buffer->buffer[buffer->nesting - 1][0];
2837 }
2838
2839 static void put_trace_buf(void)
2840 {
2841         /* Don't let the decrement of nesting leak before this */
2842         barrier();
2843         this_cpu_dec(trace_percpu_buffer->nesting);
2844 }
2845
2846 static int alloc_percpu_trace_buffer(void)
2847 {
2848         struct trace_buffer_struct __percpu *buffers;
2849
2850         buffers = alloc_percpu(struct trace_buffer_struct);
2851         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2852                 return -ENOMEM;
2853
2854         trace_percpu_buffer = buffers;
2855         return 0;
2856 }
2857
2858 static int buffers_allocated;
2859
2860 void trace_printk_init_buffers(void)
2861 {
2862         if (buffers_allocated)
2863                 return;
2864
2865         if (alloc_percpu_trace_buffer())
2866                 return;
2867
2868         /* trace_printk() is for debug use only. Don't use it in production. */
2869
2870         pr_warn("\n");
2871         pr_warn("**********************************************************\n");
2872         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2873         pr_warn("**                                                      **\n");
2874         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2875         pr_warn("**                                                      **\n");
2876         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2877         pr_warn("** unsafe for production use.                           **\n");
2878         pr_warn("**                                                      **\n");
2879         pr_warn("** If you see this message and you are not debugging    **\n");
2880         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2881         pr_warn("**                                                      **\n");
2882         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2883         pr_warn("**********************************************************\n");
2884
2885         /* Expand the buffers to set size */
2886         tracing_update_buffers();
2887
2888         buffers_allocated = 1;
2889
2890         /*
2891          * trace_printk_init_buffers() can be called by modules.
2892          * If that happens, then we need to start cmdline recording
2893          * directly here. If the global_trace.buffer is already
2894          * allocated here, then this was called by module code.
2895          */
2896         if (global_trace.trace_buffer.buffer)
2897                 tracing_start_cmdline_record();
2898 }
2899
2900 void trace_printk_start_comm(void)
2901 {
2902         /* Start tracing comms if trace printk is set */
2903         if (!buffers_allocated)
2904                 return;
2905         tracing_start_cmdline_record();
2906 }
2907
2908 static void trace_printk_start_stop_comm(int enabled)
2909 {
2910         if (!buffers_allocated)
2911                 return;
2912
2913         if (enabled)
2914                 tracing_start_cmdline_record();
2915         else
2916                 tracing_stop_cmdline_record();
2917 }
2918
2919 /**
2920  * trace_vbprintk - write binary msg to tracing buffer
2921  *
2922  */
2923 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2924 {
2925         struct trace_event_call *call = &event_bprint;
2926         struct ring_buffer_event *event;
2927         struct ring_buffer *buffer;
2928         struct trace_array *tr = &global_trace;
2929         struct bprint_entry *entry;
2930         unsigned long flags;
2931         char *tbuffer;
2932         int len = 0, size, pc;
2933
2934         if (unlikely(tracing_selftest_running || tracing_disabled))
2935                 return 0;
2936
2937         /* Don't pollute graph traces with trace_vprintk internals */
2938         pause_graph_tracing();
2939
2940         pc = preempt_count();
2941         preempt_disable_notrace();
2942
2943         tbuffer = get_trace_buf();
2944         if (!tbuffer) {
2945                 len = 0;
2946                 goto out_nobuffer;
2947         }
2948
2949         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2950
2951         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2952                 goto out;
2953
2954         local_save_flags(flags);
2955         size = sizeof(*entry) + sizeof(u32) * len;
2956         buffer = tr->trace_buffer.buffer;
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2958                                             flags, pc);
2959         if (!event)
2960                 goto out;
2961         entry = ring_buffer_event_data(event);
2962         entry->ip                       = ip;
2963         entry->fmt                      = fmt;
2964
2965         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2966         if (!call_filter_check_discard(call, entry, buffer, event)) {
2967                 __buffer_unlock_commit(buffer, event);
2968                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2969         }
2970
2971 out:
2972         put_trace_buf();
2973
2974 out_nobuffer:
2975         preempt_enable_notrace();
2976         unpause_graph_tracing();
2977
2978         return len;
2979 }
2980 EXPORT_SYMBOL_GPL(trace_vbprintk);
2981
2982 __printf(3, 0)
2983 static int
2984 __trace_array_vprintk(struct ring_buffer *buffer,
2985                       unsigned long ip, const char *fmt, va_list args)
2986 {
2987         struct trace_event_call *call = &event_print;
2988         struct ring_buffer_event *event;
2989         int len = 0, size, pc;
2990         struct print_entry *entry;
2991         unsigned long flags;
2992         char *tbuffer;
2993
2994         if (tracing_disabled || tracing_selftest_running)
2995                 return 0;
2996
2997         /* Don't pollute graph traces with trace_vprintk internals */
2998         pause_graph_tracing();
2999
3000         pc = preempt_count();
3001         preempt_disable_notrace();
3002
3003
3004         tbuffer = get_trace_buf();
3005         if (!tbuffer) {
3006                 len = 0;
3007                 goto out_nobuffer;
3008         }
3009
3010         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3011
3012         local_save_flags(flags);
3013         size = sizeof(*entry) + len + 1;
3014         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3015                                             flags, pc);
3016         if (!event)
3017                 goto out;
3018         entry = ring_buffer_event_data(event);
3019         entry->ip = ip;
3020
3021         memcpy(&entry->buf, tbuffer, len + 1);
3022         if (!call_filter_check_discard(call, entry, buffer, event)) {
3023                 __buffer_unlock_commit(buffer, event);
3024                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3025         }
3026
3027 out:
3028         put_trace_buf();
3029
3030 out_nobuffer:
3031         preempt_enable_notrace();
3032         unpause_graph_tracing();
3033
3034         return len;
3035 }
3036
3037 __printf(3, 0)
3038 int trace_array_vprintk(struct trace_array *tr,
3039                         unsigned long ip, const char *fmt, va_list args)
3040 {
3041         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3042 }
3043
3044 __printf(3, 0)
3045 int trace_array_printk(struct trace_array *tr,
3046                        unsigned long ip, const char *fmt, ...)
3047 {
3048         int ret;
3049         va_list ap;
3050
3051         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052                 return 0;
3053
3054         if (!tr)
3055                 return -ENOENT;
3056
3057         va_start(ap, fmt);
3058         ret = trace_array_vprintk(tr, ip, fmt, ap);
3059         va_end(ap);
3060         return ret;
3061 }
3062
3063 __printf(3, 4)
3064 int trace_array_printk_buf(struct ring_buffer *buffer,
3065                            unsigned long ip, const char *fmt, ...)
3066 {
3067         int ret;
3068         va_list ap;
3069
3070         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3071                 return 0;
3072
3073         va_start(ap, fmt);
3074         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3075         va_end(ap);
3076         return ret;
3077 }
3078
3079 __printf(2, 0)
3080 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082         return trace_array_vprintk(&global_trace, ip, fmt, args);
3083 }
3084 EXPORT_SYMBOL_GPL(trace_vprintk);
3085
3086 static void trace_iterator_increment(struct trace_iterator *iter)
3087 {
3088         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3089
3090         iter->idx++;
3091         if (buf_iter)
3092                 ring_buffer_read(buf_iter, NULL);
3093 }
3094
3095 static struct trace_entry *
3096 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3097                 unsigned long *lost_events)
3098 {
3099         struct ring_buffer_event *event;
3100         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3101
3102         if (buf_iter)
3103                 event = ring_buffer_iter_peek(buf_iter, ts);
3104         else
3105                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3106                                          lost_events);
3107
3108         if (event) {
3109                 iter->ent_size = ring_buffer_event_length(event);
3110                 return ring_buffer_event_data(event);
3111         }
3112         iter->ent_size = 0;
3113         return NULL;
3114 }
3115
3116 static struct trace_entry *
3117 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3118                   unsigned long *missing_events, u64 *ent_ts)
3119 {
3120         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3121         struct trace_entry *ent, *next = NULL;
3122         unsigned long lost_events = 0, next_lost = 0;
3123         int cpu_file = iter->cpu_file;
3124         u64 next_ts = 0, ts;
3125         int next_cpu = -1;
3126         int next_size = 0;
3127         int cpu;
3128
3129         /*
3130          * If we are in a per_cpu trace file, don't bother by iterating over
3131          * all cpu and peek directly.
3132          */
3133         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3134                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3135                         return NULL;
3136                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3137                 if (ent_cpu)
3138                         *ent_cpu = cpu_file;
3139
3140                 return ent;
3141         }
3142
3143         for_each_tracing_cpu(cpu) {
3144
3145                 if (ring_buffer_empty_cpu(buffer, cpu))
3146                         continue;
3147
3148                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3149
3150                 /*
3151                  * Pick the entry with the smallest timestamp:
3152                  */
3153                 if (ent && (!next || ts < next_ts)) {
3154                         next = ent;
3155                         next_cpu = cpu;
3156                         next_ts = ts;
3157                         next_lost = lost_events;
3158                         next_size = iter->ent_size;
3159                 }
3160         }
3161
3162         iter->ent_size = next_size;
3163
3164         if (ent_cpu)
3165                 *ent_cpu = next_cpu;
3166
3167         if (ent_ts)
3168                 *ent_ts = next_ts;
3169
3170         if (missing_events)
3171                 *missing_events = next_lost;
3172
3173         return next;
3174 }
3175
3176 /* Find the next real entry, without updating the iterator itself */
3177 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3178                                           int *ent_cpu, u64 *ent_ts)
3179 {
3180         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3181 }
3182
3183 /* Find the next real entry, and increment the iterator to the next entry */
3184 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3185 {
3186         iter->ent = __find_next_entry(iter, &iter->cpu,
3187                                       &iter->lost_events, &iter->ts);
3188
3189         if (iter->ent)
3190                 trace_iterator_increment(iter);
3191
3192         return iter->ent ? iter : NULL;
3193 }
3194
3195 static void trace_consume(struct trace_iterator *iter)
3196 {
3197         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3198                             &iter->lost_events);
3199 }
3200
3201 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3202 {
3203         struct trace_iterator *iter = m->private;
3204         int i = (int)*pos;
3205         void *ent;
3206
3207         WARN_ON_ONCE(iter->leftover);
3208
3209         (*pos)++;
3210
3211         /* can't go backwards */
3212         if (iter->idx > i)
3213                 return NULL;
3214
3215         if (iter->idx < 0)
3216                 ent = trace_find_next_entry_inc(iter);
3217         else
3218                 ent = iter;
3219
3220         while (ent && iter->idx < i)
3221                 ent = trace_find_next_entry_inc(iter);
3222
3223         iter->pos = *pos;
3224
3225         return ent;
3226 }
3227
3228 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3229 {
3230         struct ring_buffer_event *event;
3231         struct ring_buffer_iter *buf_iter;
3232         unsigned long entries = 0;
3233         u64 ts;
3234
3235         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3236
3237         buf_iter = trace_buffer_iter(iter, cpu);
3238         if (!buf_iter)
3239                 return;
3240
3241         ring_buffer_iter_reset(buf_iter);
3242
3243         /*
3244          * We could have the case with the max latency tracers
3245          * that a reset never took place on a cpu. This is evident
3246          * by the timestamp being before the start of the buffer.
3247          */
3248         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3249                 if (ts >= iter->trace_buffer->time_start)
3250                         break;
3251                 entries++;
3252                 ring_buffer_read(buf_iter, NULL);
3253         }
3254
3255         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3256 }
3257
3258 /*
3259  * The current tracer is copied to avoid a global locking
3260  * all around.
3261  */
3262 static void *s_start(struct seq_file *m, loff_t *pos)
3263 {
3264         struct trace_iterator *iter = m->private;
3265         struct trace_array *tr = iter->tr;
3266         int cpu_file = iter->cpu_file;
3267         void *p = NULL;
3268         loff_t l = 0;
3269         int cpu;
3270
3271         /*
3272          * copy the tracer to avoid using a global lock all around.
3273          * iter->trace is a copy of current_trace, the pointer to the
3274          * name may be used instead of a strcmp(), as iter->trace->name
3275          * will point to the same string as current_trace->name.
3276          */
3277         mutex_lock(&trace_types_lock);
3278         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3279                 *iter->trace = *tr->current_trace;
3280         mutex_unlock(&trace_types_lock);
3281
3282 #ifdef CONFIG_TRACER_MAX_TRACE
3283         if (iter->snapshot && iter->trace->use_max_tr)
3284                 return ERR_PTR(-EBUSY);
3285 #endif
3286
3287         if (*pos != iter->pos) {
3288                 iter->ent = NULL;
3289                 iter->cpu = 0;
3290                 iter->idx = -1;
3291
3292                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3293                         for_each_tracing_cpu(cpu)
3294                                 tracing_iter_reset(iter, cpu);
3295                 } else
3296                         tracing_iter_reset(iter, cpu_file);
3297
3298                 iter->leftover = 0;
3299                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3300                         ;
3301
3302         } else {
3303                 /*
3304                  * If we overflowed the seq_file before, then we want
3305                  * to just reuse the trace_seq buffer again.
3306                  */
3307                 if (iter->leftover)
3308                         p = iter;
3309                 else {
3310                         l = *pos - 1;
3311                         p = s_next(m, p, &l);
3312                 }
3313         }
3314
3315         trace_event_read_lock();
3316         trace_access_lock(cpu_file);
3317         return p;
3318 }
3319
3320 static void s_stop(struct seq_file *m, void *p)
3321 {
3322         struct trace_iterator *iter = m->private;
3323
3324 #ifdef CONFIG_TRACER_MAX_TRACE
3325         if (iter->snapshot && iter->trace->use_max_tr)
3326                 return;
3327 #endif
3328
3329         trace_access_unlock(iter->cpu_file);
3330         trace_event_read_unlock();
3331 }
3332
3333 static void
3334 get_total_entries(struct trace_buffer *buf,
3335                   unsigned long *total, unsigned long *entries)
3336 {
3337         unsigned long count;
3338         int cpu;
3339
3340         *total = 0;
3341         *entries = 0;
3342
3343         for_each_tracing_cpu(cpu) {
3344                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3345                 /*
3346                  * If this buffer has skipped entries, then we hold all
3347                  * entries for the trace and we need to ignore the
3348                  * ones before the time stamp.
3349                  */
3350                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3351                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3352                         /* total is the same as the entries */
3353                         *total += count;
3354                 } else
3355                         *total += count +
3356                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3357                 *entries += count;
3358         }
3359 }
3360
3361 static void print_lat_help_header(struct seq_file *m)
3362 {
3363         seq_puts(m, "#                  _------=> CPU#            \n"
3364                     "#                 / _-----=> irqs-off        \n"
3365                     "#                | / _----=> need-resched    \n"
3366                     "#                || / _---=> hardirq/softirq \n"
3367                     "#                ||| / _--=> preempt-depth   \n"
3368                     "#                |||| /     delay            \n"
3369                     "#  cmd     pid   ||||| time  |   caller      \n"
3370                     "#     \\   /      |||||  \\    |   /         \n");
3371 }
3372
3373 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3374 {
3375         unsigned long total;
3376         unsigned long entries;
3377
3378         get_total_entries(buf, &total, &entries);
3379         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3380                    entries, total, num_online_cpus());
3381         seq_puts(m, "#\n");
3382 }
3383
3384 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3385                                    unsigned int flags)
3386 {
3387         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3388
3389         print_event_info(buf, m);
3390
3391         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3392         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3393 }
3394
3395 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3396                                        unsigned int flags)
3397 {
3398         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3399         const char tgid_space[] = "          ";
3400         const char space[] = "  ";
3401
3402         print_event_info(buf, m);
3403
3404         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3405                    tgid ? tgid_space : space);
3406         seq_printf(m, "#                          %s / _----=> need-resched\n",
3407                    tgid ? tgid_space : space);
3408         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3409                    tgid ? tgid_space : space);
3410         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3411                    tgid ? tgid_space : space);
3412         seq_printf(m, "#                          %s||| /     delay\n",
3413                    tgid ? tgid_space : space);
3414         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3415                    tgid ? "   TGID   " : space);
3416         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3417                    tgid ? "     |    " : space);
3418 }
3419
3420 void
3421 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3422 {
3423         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3424         struct trace_buffer *buf = iter->trace_buffer;
3425         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3426         struct tracer *type = iter->trace;
3427         unsigned long entries;
3428         unsigned long total;
3429         const char *name = "preemption";
3430
3431         name = type->name;
3432
3433         get_total_entries(buf, &total, &entries);
3434
3435         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3436                    name, UTS_RELEASE);
3437         seq_puts(m, "# -----------------------------------"
3438                  "---------------------------------\n");
3439         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3440                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3441                    nsecs_to_usecs(data->saved_latency),
3442                    entries,
3443                    total,
3444                    buf->cpu,
3445 #if defined(CONFIG_PREEMPT_NONE)
3446                    "server",
3447 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3448                    "desktop",
3449 #elif defined(CONFIG_PREEMPT)
3450                    "preempt",
3451 #else
3452                    "unknown",
3453 #endif
3454                    /* These are reserved for later use */
3455                    0, 0, 0, 0);
3456 #ifdef CONFIG_SMP
3457         seq_printf(m, " #P:%d)\n", num_online_cpus());
3458 #else
3459         seq_puts(m, ")\n");
3460 #endif
3461         seq_puts(m, "#    -----------------\n");
3462         seq_printf(m, "#    | task: %.16s-%d "
3463                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3464                    data->comm, data->pid,
3465                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3466                    data->policy, data->rt_priority);
3467         seq_puts(m, "#    -----------------\n");
3468
3469         if (data->critical_start) {
3470                 seq_puts(m, "#  => started at: ");
3471                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3472                 trace_print_seq(m, &iter->seq);
3473                 seq_puts(m, "\n#  => ended at:   ");
3474                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3475                 trace_print_seq(m, &iter->seq);
3476                 seq_puts(m, "\n#\n");
3477         }
3478
3479         seq_puts(m, "#\n");
3480 }
3481
3482 static void test_cpu_buff_start(struct trace_iterator *iter)
3483 {
3484         struct trace_seq *s = &iter->seq;
3485         struct trace_array *tr = iter->tr;
3486
3487         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3488                 return;
3489
3490         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3491                 return;
3492
3493         if (cpumask_available(iter->started) &&
3494             cpumask_test_cpu(iter->cpu, iter->started))
3495                 return;
3496
3497         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3498                 return;
3499
3500         if (cpumask_available(iter->started))
3501                 cpumask_set_cpu(iter->cpu, iter->started);
3502
3503         /* Don't print started cpu buffer for the first entry of the trace */
3504         if (iter->idx > 1)
3505                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3506                                 iter->cpu);
3507 }
3508
3509 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3510 {
3511         struct trace_array *tr = iter->tr;
3512         struct trace_seq *s = &iter->seq;
3513         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3514         struct trace_entry *entry;
3515         struct trace_event *event;
3516
3517         entry = iter->ent;
3518
3519         test_cpu_buff_start(iter);
3520
3521         event = ftrace_find_event(entry->type);
3522
3523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3524                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3525                         trace_print_lat_context(iter);
3526                 else
3527                         trace_print_context(iter);
3528         }
3529
3530         if (trace_seq_has_overflowed(s))
3531                 return TRACE_TYPE_PARTIAL_LINE;
3532
3533         if (event)
3534                 return event->funcs->trace(iter, sym_flags, event);
3535
3536         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3537
3538         return trace_handle_return(s);
3539 }
3540
3541 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3542 {
3543         struct trace_array *tr = iter->tr;
3544         struct trace_seq *s = &iter->seq;
3545         struct trace_entry *entry;
3546         struct trace_event *event;
3547
3548         entry = iter->ent;
3549
3550         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3551                 trace_seq_printf(s, "%d %d %llu ",
3552                                  entry->pid, iter->cpu, iter->ts);
3553
3554         if (trace_seq_has_overflowed(s))
3555                 return TRACE_TYPE_PARTIAL_LINE;
3556
3557         event = ftrace_find_event(entry->type);
3558         if (event)
3559                 return event->funcs->raw(iter, 0, event);
3560
3561         trace_seq_printf(s, "%d ?\n", entry->type);
3562
3563         return trace_handle_return(s);
3564 }
3565
3566 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3567 {
3568         struct trace_array *tr = iter->tr;
3569         struct trace_seq *s = &iter->seq;
3570         unsigned char newline = '\n';
3571         struct trace_entry *entry;
3572         struct trace_event *event;
3573
3574         entry = iter->ent;
3575
3576         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3577                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3578                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3579                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3580                 if (trace_seq_has_overflowed(s))
3581                         return TRACE_TYPE_PARTIAL_LINE;
3582         }
3583
3584         event = ftrace_find_event(entry->type);
3585         if (event) {
3586                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3587                 if (ret != TRACE_TYPE_HANDLED)
3588                         return ret;
3589         }
3590
3591         SEQ_PUT_FIELD(s, newline);
3592
3593         return trace_handle_return(s);
3594 }
3595
3596 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3597 {
3598         struct trace_array *tr = iter->tr;
3599         struct trace_seq *s = &iter->seq;
3600         struct trace_entry *entry;
3601         struct trace_event *event;
3602
3603         entry = iter->ent;
3604
3605         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3606                 SEQ_PUT_FIELD(s, entry->pid);
3607                 SEQ_PUT_FIELD(s, iter->cpu);
3608                 SEQ_PUT_FIELD(s, iter->ts);
3609                 if (trace_seq_has_overflowed(s))
3610                         return TRACE_TYPE_PARTIAL_LINE;
3611         }
3612
3613         event = ftrace_find_event(entry->type);
3614         return event ? event->funcs->binary(iter, 0, event) :
3615                 TRACE_TYPE_HANDLED;
3616 }
3617
3618 int trace_empty(struct trace_iterator *iter)
3619 {
3620         struct ring_buffer_iter *buf_iter;
3621         int cpu;
3622
3623         /* If we are looking at one CPU buffer, only check that one */
3624         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3625                 cpu = iter->cpu_file;
3626                 buf_iter = trace_buffer_iter(iter, cpu);
3627                 if (buf_iter) {
3628                         if (!ring_buffer_iter_empty(buf_iter))
3629                                 return 0;
3630                 } else {
3631                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3632                                 return 0;
3633                 }
3634                 return 1;
3635         }
3636
3637         for_each_tracing_cpu(cpu) {
3638                 buf_iter = trace_buffer_iter(iter, cpu);
3639                 if (buf_iter) {
3640                         if (!ring_buffer_iter_empty(buf_iter))
3641                                 return 0;
3642                 } else {
3643                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3644                                 return 0;
3645                 }
3646         }
3647
3648         return 1;
3649 }
3650
3651 /*  Called with trace_event_read_lock() held. */
3652 enum print_line_t print_trace_line(struct trace_iterator *iter)
3653 {
3654         struct trace_array *tr = iter->tr;
3655         unsigned long trace_flags = tr->trace_flags;
3656         enum print_line_t ret;
3657
3658         if (iter->lost_events) {
3659                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3660                                  iter->cpu, iter->lost_events);
3661                 if (trace_seq_has_overflowed(&iter->seq))
3662                         return TRACE_TYPE_PARTIAL_LINE;
3663         }
3664
3665         if (iter->trace && iter->trace->print_line) {
3666                 ret = iter->trace->print_line(iter);
3667                 if (ret != TRACE_TYPE_UNHANDLED)
3668                         return ret;
3669         }
3670
3671         if (iter->ent->type == TRACE_BPUTS &&
3672                         trace_flags & TRACE_ITER_PRINTK &&
3673                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3674                 return trace_print_bputs_msg_only(iter);
3675
3676         if (iter->ent->type == TRACE_BPRINT &&
3677                         trace_flags & TRACE_ITER_PRINTK &&
3678                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3679                 return trace_print_bprintk_msg_only(iter);
3680
3681         if (iter->ent->type == TRACE_PRINT &&
3682                         trace_flags & TRACE_ITER_PRINTK &&
3683                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3684                 return trace_print_printk_msg_only(iter);
3685
3686         if (trace_flags & TRACE_ITER_BIN)
3687                 return print_bin_fmt(iter);
3688
3689         if (trace_flags & TRACE_ITER_HEX)
3690                 return print_hex_fmt(iter);
3691
3692         if (trace_flags & TRACE_ITER_RAW)
3693                 return print_raw_fmt(iter);
3694
3695         return print_trace_fmt(iter);
3696 }
3697
3698 void trace_latency_header(struct seq_file *m)
3699 {
3700         struct trace_iterator *iter = m->private;
3701         struct trace_array *tr = iter->tr;
3702
3703         /* print nothing if the buffers are empty */
3704         if (trace_empty(iter))
3705                 return;
3706
3707         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3708                 print_trace_header(m, iter);
3709
3710         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3711                 print_lat_help_header(m);
3712 }
3713
3714 void trace_default_header(struct seq_file *m)
3715 {
3716         struct trace_iterator *iter = m->private;
3717         struct trace_array *tr = iter->tr;
3718         unsigned long trace_flags = tr->trace_flags;
3719
3720         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3721                 return;
3722
3723         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3724                 /* print nothing if the buffers are empty */
3725                 if (trace_empty(iter))
3726                         return;
3727                 print_trace_header(m, iter);
3728                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3729                         print_lat_help_header(m);
3730         } else {
3731                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3732                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3733                                 print_func_help_header_irq(iter->trace_buffer,
3734                                                            m, trace_flags);
3735                         else
3736                                 print_func_help_header(iter->trace_buffer, m,
3737                                                        trace_flags);
3738                 }
3739         }
3740 }
3741
3742 static void test_ftrace_alive(struct seq_file *m)
3743 {
3744         if (!ftrace_is_dead())
3745                 return;
3746         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3747                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3748 }
3749
3750 #ifdef CONFIG_TRACER_MAX_TRACE
3751 static void show_snapshot_main_help(struct seq_file *m)
3752 {
3753         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3754                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3755                     "#                      Takes a snapshot of the main buffer.\n"
3756                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3757                     "#                      (Doesn't have to be '2' works with any number that\n"
3758                     "#                       is not a '0' or '1')\n");
3759 }
3760
3761 static void show_snapshot_percpu_help(struct seq_file *m)
3762 {
3763         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3764 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3765         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3766                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3767 #else
3768         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3769                     "#                     Must use main snapshot file to allocate.\n");
3770 #endif
3771         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3772                     "#                      (Doesn't have to be '2' works with any number that\n"
3773                     "#                       is not a '0' or '1')\n");
3774 }
3775
3776 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3777 {
3778         if (iter->tr->allocated_snapshot)
3779                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3780         else
3781                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3782
3783         seq_puts(m, "# Snapshot commands:\n");
3784         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3785                 show_snapshot_main_help(m);
3786         else
3787                 show_snapshot_percpu_help(m);
3788 }
3789 #else
3790 /* Should never be called */
3791 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3792 #endif
3793
3794 static int s_show(struct seq_file *m, void *v)
3795 {
3796         struct trace_iterator *iter = v;
3797         int ret;
3798
3799         if (iter->ent == NULL) {
3800                 if (iter->tr) {
3801                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3802                         seq_puts(m, "#\n");
3803                         test_ftrace_alive(m);
3804                 }
3805                 if (iter->snapshot && trace_empty(iter))
3806                         print_snapshot_help(m, iter);
3807                 else if (iter->trace && iter->trace->print_header)
3808                         iter->trace->print_header(m);
3809                 else
3810                         trace_default_header(m);
3811
3812         } else if (iter->leftover) {
3813                 /*
3814                  * If we filled the seq_file buffer earlier, we
3815                  * want to just show it now.
3816                  */
3817                 ret = trace_print_seq(m, &iter->seq);
3818
3819                 /* ret should this time be zero, but you never know */
3820                 iter->leftover = ret;
3821
3822         } else {
3823                 print_trace_line(iter);
3824                 ret = trace_print_seq(m, &iter->seq);
3825                 /*
3826                  * If we overflow the seq_file buffer, then it will
3827                  * ask us for this data again at start up.
3828                  * Use that instead.
3829                  *  ret is 0 if seq_file write succeeded.
3830                  *        -1 otherwise.
3831                  */
3832                 iter->leftover = ret;
3833         }
3834
3835         return 0;
3836 }
3837
3838 /*
3839  * Should be used after trace_array_get(), trace_types_lock
3840  * ensures that i_cdev was already initialized.
3841  */
3842 static inline int tracing_get_cpu(struct inode *inode)
3843 {
3844         if (inode->i_cdev) /* See trace_create_cpu_file() */
3845                 return (long)inode->i_cdev - 1;
3846         return RING_BUFFER_ALL_CPUS;
3847 }
3848
3849 static const struct seq_operations tracer_seq_ops = {
3850         .start          = s_start,
3851         .next           = s_next,
3852         .stop           = s_stop,
3853         .show           = s_show,
3854 };
3855
3856 static struct trace_iterator *
3857 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3858 {
3859         struct trace_array *tr = inode->i_private;
3860         struct trace_iterator *iter;
3861         int cpu;
3862
3863         if (tracing_disabled)
3864                 return ERR_PTR(-ENODEV);
3865
3866         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3867         if (!iter)
3868                 return ERR_PTR(-ENOMEM);
3869
3870         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3871                                     GFP_KERNEL);
3872         if (!iter->buffer_iter)
3873                 goto release;
3874
3875         /*
3876          * We make a copy of the current tracer to avoid concurrent
3877          * changes on it while we are reading.
3878          */
3879         mutex_lock(&trace_types_lock);
3880         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3881         if (!iter->trace)
3882                 goto fail;
3883
3884         *iter->trace = *tr->current_trace;
3885
3886         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3887                 goto fail;
3888
3889         iter->tr = tr;
3890
3891 #ifdef CONFIG_TRACER_MAX_TRACE
3892         /* Currently only the top directory has a snapshot */
3893         if (tr->current_trace->print_max || snapshot)
3894                 iter->trace_buffer = &tr->max_buffer;
3895         else
3896 #endif
3897                 iter->trace_buffer = &tr->trace_buffer;
3898         iter->snapshot = snapshot;
3899         iter->pos = -1;
3900         iter->cpu_file = tracing_get_cpu(inode);
3901         mutex_init(&iter->mutex);
3902
3903         /* Notify the tracer early; before we stop tracing. */
3904         if (iter->trace && iter->trace->open)
3905                 iter->trace->open(iter);
3906
3907         /* Annotate start of buffers if we had overruns */
3908         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3909                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3910
3911         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3912         if (trace_clocks[tr->clock_id].in_ns)
3913                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3914
3915         /* stop the trace while dumping if we are not opening "snapshot" */
3916         if (!iter->snapshot)
3917                 tracing_stop_tr(tr);
3918
3919         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3920                 for_each_tracing_cpu(cpu) {
3921                         iter->buffer_iter[cpu] =
3922                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3923                                                          cpu, GFP_KERNEL);
3924                 }
3925                 ring_buffer_read_prepare_sync();
3926                 for_each_tracing_cpu(cpu) {
3927                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3928                         tracing_iter_reset(iter, cpu);
3929                 }
3930         } else {
3931                 cpu = iter->cpu_file;
3932                 iter->buffer_iter[cpu] =
3933                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3934                                                  cpu, GFP_KERNEL);
3935                 ring_buffer_read_prepare_sync();
3936                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3937                 tracing_iter_reset(iter, cpu);
3938         }
3939
3940         mutex_unlock(&trace_types_lock);
3941
3942         return iter;
3943
3944  fail:
3945         mutex_unlock(&trace_types_lock);
3946         kfree(iter->trace);
3947         kfree(iter->buffer_iter);
3948 release:
3949         seq_release_private(inode, file);
3950         return ERR_PTR(-ENOMEM);
3951 }
3952
3953 int tracing_open_generic(struct inode *inode, struct file *filp)
3954 {
3955         if (tracing_disabled)
3956                 return -ENODEV;
3957
3958         filp->private_data = inode->i_private;
3959         return 0;
3960 }
3961
3962 bool tracing_is_disabled(void)
3963 {
3964         return (tracing_disabled) ? true: false;
3965 }
3966
3967 /*
3968  * Open and update trace_array ref count.
3969  * Must have the current trace_array passed to it.
3970  */
3971 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3972 {
3973         struct trace_array *tr = inode->i_private;
3974
3975         if (tracing_disabled)
3976                 return -ENODEV;
3977
3978         if (trace_array_get(tr) < 0)
3979                 return -ENODEV;
3980
3981         filp->private_data = inode->i_private;
3982
3983         return 0;
3984 }
3985
3986 static int tracing_release(struct inode *inode, struct file *file)
3987 {
3988         struct trace_array *tr = inode->i_private;
3989         struct seq_file *m = file->private_data;
3990         struct trace_iterator *iter;
3991         int cpu;
3992
3993         if (!(file->f_mode & FMODE_READ)) {
3994                 trace_array_put(tr);
3995                 return 0;
3996         }
3997
3998         /* Writes do not use seq_file */
3999         iter = m->private;
4000         mutex_lock(&trace_types_lock);
4001
4002         for_each_tracing_cpu(cpu) {
4003                 if (iter->buffer_iter[cpu])
4004                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4005         }
4006
4007         if (iter->trace && iter->trace->close)
4008                 iter->trace->close(iter);
4009
4010         if (!iter->snapshot)
4011                 /* reenable tracing if it was previously enabled */
4012                 tracing_start_tr(tr);
4013
4014         __trace_array_put(tr);
4015
4016         mutex_unlock(&trace_types_lock);
4017
4018         mutex_destroy(&iter->mutex);
4019         free_cpumask_var(iter->started);
4020         kfree(iter->trace);
4021         kfree(iter->buffer_iter);
4022         seq_release_private(inode, file);
4023
4024         return 0;
4025 }
4026
4027 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4028 {
4029         struct trace_array *tr = inode->i_private;
4030
4031         trace_array_put(tr);
4032         return 0;
4033 }
4034
4035 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4036 {
4037         struct trace_array *tr = inode->i_private;
4038
4039         trace_array_put(tr);
4040
4041         return single_release(inode, file);
4042 }
4043
4044 static int tracing_open(struct inode *inode, struct file *file)
4045 {
4046         struct trace_array *tr = inode->i_private;
4047         struct trace_iterator *iter;
4048         int ret = 0;
4049
4050         if (trace_array_get(tr) < 0)
4051                 return -ENODEV;
4052
4053         /* If this file was open for write, then erase contents */
4054         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4055                 int cpu = tracing_get_cpu(inode);
4056                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4057
4058 #ifdef CONFIG_TRACER_MAX_TRACE
4059                 if (tr->current_trace->print_max)
4060                         trace_buf = &tr->max_buffer;
4061 #endif
4062
4063                 if (cpu == RING_BUFFER_ALL_CPUS)
4064                         tracing_reset_online_cpus(trace_buf);
4065                 else
4066                         tracing_reset(trace_buf, cpu);
4067         }
4068
4069         if (file->f_mode & FMODE_READ) {
4070                 iter = __tracing_open(inode, file, false);
4071                 if (IS_ERR(iter))
4072                         ret = PTR_ERR(iter);
4073                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4074                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4075         }
4076
4077         if (ret < 0)
4078                 trace_array_put(tr);
4079
4080         return ret;
4081 }
4082
4083 /*
4084  * Some tracers are not suitable for instance buffers.
4085  * A tracer is always available for the global array (toplevel)
4086  * or if it explicitly states that it is.
4087  */
4088 static bool
4089 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4090 {
4091         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4092 }
4093
4094 /* Find the next tracer that this trace array may use */
4095 static struct tracer *
4096 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4097 {
4098         while (t && !trace_ok_for_array(t, tr))
4099                 t = t->next;
4100
4101         return t;
4102 }
4103
4104 static void *
4105 t_next(struct seq_file *m, void *v, loff_t *pos)
4106 {
4107         struct trace_array *tr = m->private;
4108         struct tracer *t = v;
4109
4110         (*pos)++;
4111
4112         if (t)
4113                 t = get_tracer_for_array(tr, t->next);
4114
4115         return t;
4116 }
4117
4118 static void *t_start(struct seq_file *m, loff_t *pos)
4119 {
4120         struct trace_array *tr = m->private;
4121         struct tracer *t;
4122         loff_t l = 0;
4123
4124         mutex_lock(&trace_types_lock);
4125
4126         t = get_tracer_for_array(tr, trace_types);
4127         for (; t && l < *pos; t = t_next(m, t, &l))
4128                         ;
4129
4130         return t;
4131 }
4132
4133 static void t_stop(struct seq_file *m, void *p)
4134 {
4135         mutex_unlock(&trace_types_lock);
4136 }
4137
4138 static int t_show(struct seq_file *m, void *v)
4139 {
4140         struct tracer *t = v;
4141
4142         if (!t)
4143                 return 0;
4144
4145         seq_puts(m, t->name);
4146         if (t->next)
4147                 seq_putc(m, ' ');
4148         else
4149                 seq_putc(m, '\n');
4150
4151         return 0;
4152 }
4153
4154 static const struct seq_operations show_traces_seq_ops = {
4155         .start          = t_start,
4156         .next           = t_next,
4157         .stop           = t_stop,
4158         .show           = t_show,
4159 };
4160
4161 static int show_traces_open(struct inode *inode, struct file *file)
4162 {
4163         struct trace_array *tr = inode->i_private;
4164         struct seq_file *m;
4165         int ret;
4166
4167         if (tracing_disabled)
4168                 return -ENODEV;
4169
4170         if (trace_array_get(tr) < 0)
4171                 return -ENODEV;
4172
4173         ret = seq_open(file, &show_traces_seq_ops);
4174         if (ret) {
4175                 trace_array_put(tr);
4176                 return ret;
4177         }
4178
4179         m = file->private_data;
4180         m->private = tr;
4181
4182         return 0;
4183 }
4184
4185 static int show_traces_release(struct inode *inode, struct file *file)
4186 {
4187         struct trace_array *tr = inode->i_private;
4188
4189         trace_array_put(tr);
4190         return seq_release(inode, file);
4191 }
4192
4193 static ssize_t
4194 tracing_write_stub(struct file *filp, const char __user *ubuf,
4195                    size_t count, loff_t *ppos)
4196 {
4197         return count;
4198 }
4199
4200 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4201 {
4202         int ret;
4203
4204         if (file->f_mode & FMODE_READ)
4205                 ret = seq_lseek(file, offset, whence);
4206         else
4207                 file->f_pos = ret = 0;
4208
4209         return ret;
4210 }
4211
4212 static const struct file_operations tracing_fops = {
4213         .open           = tracing_open,
4214         .read           = seq_read,
4215         .write          = tracing_write_stub,
4216         .llseek         = tracing_lseek,
4217         .release        = tracing_release,
4218 };
4219
4220 static const struct file_operations show_traces_fops = {
4221         .open           = show_traces_open,
4222         .read           = seq_read,
4223         .llseek         = seq_lseek,
4224         .release        = show_traces_release,
4225 };
4226
4227 static ssize_t
4228 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4229                      size_t count, loff_t *ppos)
4230 {
4231         struct trace_array *tr = file_inode(filp)->i_private;
4232         char *mask_str;
4233         int len;
4234
4235         len = snprintf(NULL, 0, "%*pb\n",
4236                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4237         mask_str = kmalloc(len, GFP_KERNEL);
4238         if (!mask_str)
4239                 return -ENOMEM;
4240
4241         len = snprintf(mask_str, len, "%*pb\n",
4242                        cpumask_pr_args(tr->tracing_cpumask));
4243         if (len >= count) {
4244                 count = -EINVAL;
4245                 goto out_err;
4246         }
4247         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4248
4249 out_err:
4250         kfree(mask_str);
4251
4252         return count;
4253 }
4254
4255 static ssize_t
4256 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4257                       size_t count, loff_t *ppos)
4258 {
4259         struct trace_array *tr = file_inode(filp)->i_private;
4260         cpumask_var_t tracing_cpumask_new;
4261         int err, cpu;
4262
4263         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4264                 return -ENOMEM;
4265
4266         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4267         if (err)
4268                 goto err_unlock;
4269
4270         local_irq_disable();
4271         arch_spin_lock(&tr->max_lock);
4272         for_each_tracing_cpu(cpu) {
4273                 /*
4274                  * Increase/decrease the disabled counter if we are
4275                  * about to flip a bit in the cpumask:
4276                  */
4277                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4278                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4279                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4280                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4281                 }
4282                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4283                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4284                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4285                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4286                 }
4287         }
4288         arch_spin_unlock(&tr->max_lock);
4289         local_irq_enable();
4290
4291         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4292         free_cpumask_var(tracing_cpumask_new);
4293
4294         return count;
4295
4296 err_unlock:
4297         free_cpumask_var(tracing_cpumask_new);
4298
4299         return err;
4300 }
4301
4302 static const struct file_operations tracing_cpumask_fops = {
4303         .open           = tracing_open_generic_tr,
4304         .read           = tracing_cpumask_read,
4305         .write          = tracing_cpumask_write,
4306         .release        = tracing_release_generic_tr,
4307         .llseek         = generic_file_llseek,
4308 };
4309
4310 static int tracing_trace_options_show(struct seq_file *m, void *v)
4311 {
4312         struct tracer_opt *trace_opts;
4313         struct trace_array *tr = m->private;
4314         u32 tracer_flags;
4315         int i;
4316
4317         mutex_lock(&trace_types_lock);
4318         tracer_flags = tr->current_trace->flags->val;
4319         trace_opts = tr->current_trace->flags->opts;
4320
4321         for (i = 0; trace_options[i]; i++) {
4322                 if (tr->trace_flags & (1 << i))
4323                         seq_printf(m, "%s\n", trace_options[i]);
4324                 else
4325                         seq_printf(m, "no%s\n", trace_options[i]);
4326         }
4327
4328         for (i = 0; trace_opts[i].name; i++) {
4329                 if (tracer_flags & trace_opts[i].bit)
4330                         seq_printf(m, "%s\n", trace_opts[i].name);
4331                 else
4332                         seq_printf(m, "no%s\n", trace_opts[i].name);
4333         }
4334         mutex_unlock(&trace_types_lock);
4335
4336         return 0;
4337 }
4338
4339 static int __set_tracer_option(struct trace_array *tr,
4340                                struct tracer_flags *tracer_flags,
4341                                struct tracer_opt *opts, int neg)
4342 {
4343         struct tracer *trace = tracer_flags->trace;
4344         int ret;
4345
4346         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4347         if (ret)
4348                 return ret;
4349
4350         if (neg)
4351                 tracer_flags->val &= ~opts->bit;
4352         else
4353                 tracer_flags->val |= opts->bit;
4354         return 0;
4355 }
4356
4357 /* Try to assign a tracer specific option */
4358 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4359 {
4360         struct tracer *trace = tr->current_trace;
4361         struct tracer_flags *tracer_flags = trace->flags;
4362         struct tracer_opt *opts = NULL;
4363         int i;
4364
4365         for (i = 0; tracer_flags->opts[i].name; i++) {
4366                 opts = &tracer_flags->opts[i];
4367
4368                 if (strcmp(cmp, opts->name) == 0)
4369                         return __set_tracer_option(tr, trace->flags, opts, neg);
4370         }
4371
4372         return -EINVAL;
4373 }
4374
4375 /* Some tracers require overwrite to stay enabled */
4376 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4377 {
4378         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4379                 return -1;
4380
4381         return 0;
4382 }
4383
4384 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4385 {
4386         int *map;
4387
4388         if ((mask == TRACE_ITER_RECORD_TGID) ||
4389             (mask == TRACE_ITER_RECORD_CMD))
4390                 lockdep_assert_held(&event_mutex);
4391
4392         /* do nothing if flag is already set */
4393         if (!!(tr->trace_flags & mask) == !!enabled)
4394                 return 0;
4395
4396         /* Give the tracer a chance to approve the change */
4397         if (tr->current_trace->flag_changed)
4398                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4399                         return -EINVAL;
4400
4401         if (enabled)
4402                 tr->trace_flags |= mask;
4403         else
4404                 tr->trace_flags &= ~mask;
4405
4406         if (mask == TRACE_ITER_RECORD_CMD)
4407                 trace_event_enable_cmd_record(enabled);
4408
4409         if (mask == TRACE_ITER_RECORD_TGID) {
4410                 if (!tgid_map) {
4411                         tgid_map_max = pid_max;
4412                         map = kzalloc((tgid_map_max + 1) * sizeof(*tgid_map),
4413                                       GFP_KERNEL);
4414
4415                         /*
4416                          * Pairs with smp_load_acquire() in
4417                          * trace_find_tgid_ptr() to ensure that if it observes
4418                          * the tgid_map we just allocated then it also observes
4419                          * the corresponding tgid_map_max value.
4420                          */
4421                         smp_store_release(&tgid_map, map);
4422                 }
4423                 if (!tgid_map) {
4424                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4425                         return -ENOMEM;
4426                 }
4427
4428                 trace_event_enable_tgid_record(enabled);
4429         }
4430
4431         if (mask == TRACE_ITER_EVENT_FORK)
4432                 trace_event_follow_fork(tr, enabled);
4433
4434         if (mask == TRACE_ITER_FUNC_FORK)
4435                 ftrace_pid_follow_fork(tr, enabled);
4436
4437         if (mask == TRACE_ITER_OVERWRITE) {
4438                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4439 #ifdef CONFIG_TRACER_MAX_TRACE
4440                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4441 #endif
4442         }
4443
4444         if (mask == TRACE_ITER_PRINTK) {
4445                 trace_printk_start_stop_comm(enabled);
4446                 trace_printk_control(enabled);
4447         }
4448
4449         return 0;
4450 }
4451
4452 static int trace_set_options(struct trace_array *tr, char *option)
4453 {
4454         char *cmp;
4455         int neg = 0;
4456         int ret = -ENODEV;
4457         int i;
4458         size_t orig_len = strlen(option);
4459
4460         cmp = strstrip(option);
4461
4462         if (strncmp(cmp, "no", 2) == 0) {
4463                 neg = 1;
4464                 cmp += 2;
4465         }
4466
4467         mutex_lock(&event_mutex);
4468         mutex_lock(&trace_types_lock);
4469
4470         for (i = 0; trace_options[i]; i++) {
4471                 if (strcmp(cmp, trace_options[i]) == 0) {
4472                         ret = set_tracer_flag(tr, 1 << i, !neg);
4473                         break;
4474                 }
4475         }
4476
4477         /* If no option could be set, test the specific tracer options */
4478         if (!trace_options[i])
4479                 ret = set_tracer_option(tr, cmp, neg);
4480
4481         mutex_unlock(&trace_types_lock);
4482         mutex_unlock(&event_mutex);
4483
4484         /*
4485          * If the first trailing whitespace is replaced with '\0' by strstrip,
4486          * turn it back into a space.
4487          */
4488         if (orig_len > strlen(option))
4489                 option[strlen(option)] = ' ';
4490
4491         return ret;
4492 }
4493
4494 static void __init apply_trace_boot_options(void)
4495 {
4496         char *buf = trace_boot_options_buf;
4497         char *option;
4498
4499         while (true) {
4500                 option = strsep(&buf, ",");
4501
4502                 if (!option)
4503                         break;
4504
4505                 if (*option)
4506                         trace_set_options(&global_trace, option);
4507
4508                 /* Put back the comma to allow this to be called again */
4509                 if (buf)
4510                         *(buf - 1) = ',';
4511         }
4512 }
4513
4514 static ssize_t
4515 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4516                         size_t cnt, loff_t *ppos)
4517 {
4518         struct seq_file *m = filp->private_data;
4519         struct trace_array *tr = m->private;
4520         char buf[64];
4521         int ret;
4522
4523         if (cnt >= sizeof(buf))
4524                 return -EINVAL;
4525
4526         if (copy_from_user(buf, ubuf, cnt))
4527                 return -EFAULT;
4528
4529         buf[cnt] = 0;
4530
4531         ret = trace_set_options(tr, buf);
4532         if (ret < 0)
4533                 return ret;
4534
4535         *ppos += cnt;
4536
4537         return cnt;
4538 }
4539
4540 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4541 {
4542         struct trace_array *tr = inode->i_private;
4543         int ret;
4544
4545         if (tracing_disabled)
4546                 return -ENODEV;
4547
4548         if (trace_array_get(tr) < 0)
4549                 return -ENODEV;
4550
4551         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4552         if (ret < 0)
4553                 trace_array_put(tr);
4554
4555         return ret;
4556 }
4557
4558 static const struct file_operations tracing_iter_fops = {
4559         .open           = tracing_trace_options_open,
4560         .read           = seq_read,
4561         .llseek         = seq_lseek,
4562         .release        = tracing_single_release_tr,
4563         .write          = tracing_trace_options_write,
4564 };
4565
4566 static const char readme_msg[] =
4567         "tracing mini-HOWTO:\n\n"
4568         "# echo 0 > tracing_on : quick way to disable tracing\n"
4569         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4570         " Important files:\n"
4571         "  trace\t\t\t- The static contents of the buffer\n"
4572         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4573         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4574         "  current_tracer\t- function and latency tracers\n"
4575         "  available_tracers\t- list of configured tracers for current_tracer\n"
4576         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4577         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4578         "  trace_clock\t\t-change the clock used to order events\n"
4579         "       local:   Per cpu clock but may not be synced across CPUs\n"
4580         "      global:   Synced across CPUs but slows tracing down.\n"
4581         "     counter:   Not a clock, but just an increment\n"
4582         "      uptime:   Jiffy counter from time of boot\n"
4583         "        perf:   Same clock that perf events use\n"
4584 #ifdef CONFIG_X86_64
4585         "     x86-tsc:   TSC cycle counter\n"
4586 #endif
4587         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4588         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4591         "\t\t\t  Remove sub-buffer with rmdir\n"
4592         "  trace_options\t\t- Set format or modify how tracing happens\n"
4593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4594         "\t\t\t  option name\n"
4595         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "\n  available_filter_functions - list of functions that can be filtered on\n"
4598         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4599         "\t\t\t  functions\n"
4600         "\t     accepts: func_full_name or glob-matching-pattern\n"
4601         "\t     modules: Can select a group via module\n"
4602         "\t      Format: :mod:<module-name>\n"
4603         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4604         "\t    triggers: a command to perform when function is hit\n"
4605         "\t      Format: <function>:<trigger>[:count]\n"
4606         "\t     trigger: traceon, traceoff\n"
4607         "\t\t      enable_event:<system>:<event>\n"
4608         "\t\t      disable_event:<system>:<event>\n"
4609 #ifdef CONFIG_STACKTRACE
4610         "\t\t      stacktrace\n"
4611 #endif
4612 #ifdef CONFIG_TRACER_SNAPSHOT
4613         "\t\t      snapshot\n"
4614 #endif
4615         "\t\t      dump\n"
4616         "\t\t      cpudump\n"
4617         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4618         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4619         "\t     The first one will disable tracing every time do_fault is hit\n"
4620         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4621         "\t       The first time do trap is hit and it disables tracing, the\n"
4622         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4623         "\t       the counter will not decrement. It only decrements when the\n"
4624         "\t       trigger did work\n"
4625         "\t     To remove trigger without count:\n"
4626         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4627         "\t     To remove trigger with a count:\n"
4628         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4629         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4630         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4631         "\t    modules: Can select a group via module command :mod:\n"
4632         "\t    Does not accept triggers\n"
4633 #endif /* CONFIG_DYNAMIC_FTRACE */
4634 #ifdef CONFIG_FUNCTION_TRACER
4635         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4636         "\t\t    (function)\n"
4637 #endif
4638 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4639         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4640         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4641         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4642 #endif
4643 #ifdef CONFIG_TRACER_SNAPSHOT
4644         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4645         "\t\t\t  snapshot buffer. Read the contents for more\n"
4646         "\t\t\t  information\n"
4647 #endif
4648 #ifdef CONFIG_STACK_TRACER
4649         "  stack_trace\t\t- Shows the max stack trace when active\n"
4650         "  stack_max_size\t- Shows current max stack size that was traced\n"
4651         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4652         "\t\t\t  new trace)\n"
4653 #ifdef CONFIG_DYNAMIC_FTRACE
4654         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4655         "\t\t\t  traces\n"
4656 #endif
4657 #endif /* CONFIG_STACK_TRACER */
4658 #ifdef CONFIG_KPROBE_EVENTS
4659         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4660         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4661 #endif
4662 #ifdef CONFIG_UPROBE_EVENTS
4663         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4664         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4665 #endif
4666 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4667         "\t  accepts: event-definitions (one definition per line)\n"
4668         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4669         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4670         "\t           -:[<group>/]<event>\n"
4671 #ifdef CONFIG_KPROBE_EVENTS
4672         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4673   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4674 #endif
4675 #ifdef CONFIG_UPROBE_EVENTS
4676         "\t    place: <path>:<offset>\n"
4677 #endif
4678         "\t     args: <name>=fetcharg[:type]\n"
4679         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4680         "\t           $stack<index>, $stack, $retval, $comm\n"
4681         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4682         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4683 #endif
4684         "  events/\t\t- Directory containing all trace event subsystems:\n"
4685         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4686         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4687         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4688         "\t\t\t  events\n"
4689         "      filter\t\t- If set, only events passing filter are traced\n"
4690         "  events/<system>/<event>/\t- Directory containing control files for\n"
4691         "\t\t\t  <event>:\n"
4692         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4693         "      filter\t\t- If set, only events passing filter are traced\n"
4694         "      trigger\t\t- If set, a command to perform when event is hit\n"
4695         "\t    Format: <trigger>[:count][if <filter>]\n"
4696         "\t   trigger: traceon, traceoff\n"
4697         "\t            enable_event:<system>:<event>\n"
4698         "\t            disable_event:<system>:<event>\n"
4699 #ifdef CONFIG_HIST_TRIGGERS
4700         "\t            enable_hist:<system>:<event>\n"
4701         "\t            disable_hist:<system>:<event>\n"
4702 #endif
4703 #ifdef CONFIG_STACKTRACE
4704         "\t\t    stacktrace\n"
4705 #endif
4706 #ifdef CONFIG_TRACER_SNAPSHOT
4707         "\t\t    snapshot\n"
4708 #endif
4709 #ifdef CONFIG_HIST_TRIGGERS
4710         "\t\t    hist (see below)\n"
4711 #endif
4712         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4713         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4714         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4715         "\t                  events/block/block_unplug/trigger\n"
4716         "\t   The first disables tracing every time block_unplug is hit.\n"
4717         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4718         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4719         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4720         "\t   Like function triggers, the counter is only decremented if it\n"
4721         "\t    enabled or disabled tracing.\n"
4722         "\t   To remove a trigger without a count:\n"
4723         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4724         "\t   To remove a trigger with a count:\n"
4725         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4726         "\t   Filters can be ignored when removing a trigger.\n"
4727 #ifdef CONFIG_HIST_TRIGGERS
4728         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4729         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4730         "\t            [:values=<field1[,field2,...]>]\n"
4731         "\t            [:sort=<field1[,field2,...]>]\n"
4732         "\t            [:size=#entries]\n"
4733         "\t            [:pause][:continue][:clear]\n"
4734         "\t            [:name=histname1]\n"
4735         "\t            [if <filter>]\n\n"
4736         "\t    When a matching event is hit, an entry is added to a hash\n"
4737         "\t    table using the key(s) and value(s) named, and the value of a\n"
4738         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4739         "\t    correspond to fields in the event's format description.  Keys\n"
4740         "\t    can be any field, or the special string 'stacktrace'.\n"
4741         "\t    Compound keys consisting of up to two fields can be specified\n"
4742         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4743         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4744         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4745         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4746         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4747         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4748         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4749         "\t    its histogram data will be shared with other triggers of the\n"
4750         "\t    same name, and trigger hits will update this common data.\n\n"
4751         "\t    Reading the 'hist' file for the event will dump the hash\n"
4752         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4753         "\t    triggers attached to an event, there will be a table for each\n"
4754         "\t    trigger in the output.  The table displayed for a named\n"
4755         "\t    trigger will be the same as any other instance having the\n"
4756         "\t    same name.  The default format used to display a given field\n"
4757         "\t    can be modified by appending any of the following modifiers\n"
4758         "\t    to the field name, as applicable:\n\n"
4759         "\t            .hex        display a number as a hex value\n"
4760         "\t            .sym        display an address as a symbol\n"
4761         "\t            .sym-offset display an address as a symbol and offset\n"
4762         "\t            .execname   display a common_pid as a program name\n"
4763         "\t            .syscall    display a syscall id as a syscall name\n\n"
4764         "\t            .log2       display log2 value rather than raw number\n\n"
4765         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4766         "\t    trigger or to start a hist trigger but not log any events\n"
4767         "\t    until told to do so.  'continue' can be used to start or\n"
4768         "\t    restart a paused hist trigger.\n\n"
4769         "\t    The 'clear' parameter will clear the contents of a running\n"
4770         "\t    hist trigger and leave its current paused/active state\n"
4771         "\t    unchanged.\n\n"
4772         "\t    The enable_hist and disable_hist triggers can be used to\n"
4773         "\t    have one event conditionally start and stop another event's\n"
4774         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4775         "\t    the enable_event and disable_event triggers.\n"
4776 #endif
4777 ;
4778
4779 static ssize_t
4780 tracing_readme_read(struct file *filp, char __user *ubuf,
4781                        size_t cnt, loff_t *ppos)
4782 {
4783         return simple_read_from_buffer(ubuf, cnt, ppos,
4784                                         readme_msg, strlen(readme_msg));
4785 }
4786
4787 static const struct file_operations tracing_readme_fops = {
4788         .open           = tracing_open_generic,
4789         .read           = tracing_readme_read,
4790         .llseek         = generic_file_llseek,
4791 };
4792
4793 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4794 {
4795         int pid = ++(*pos);
4796
4797         return trace_find_tgid_ptr(pid);
4798 }
4799
4800 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4801 {
4802         int pid = *pos;
4803
4804         return trace_find_tgid_ptr(pid);
4805 }
4806
4807 static void saved_tgids_stop(struct seq_file *m, void *v)
4808 {
4809 }
4810
4811 static int saved_tgids_show(struct seq_file *m, void *v)
4812 {
4813         int *entry = (int *)v;
4814         int pid = entry - tgid_map;
4815         int tgid = *entry;
4816
4817         if (tgid == 0)
4818                 return SEQ_SKIP;
4819
4820         seq_printf(m, "%d %d\n", pid, tgid);
4821         return 0;
4822 }
4823
4824 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4825         .start          = saved_tgids_start,
4826         .stop           = saved_tgids_stop,
4827         .next           = saved_tgids_next,
4828         .show           = saved_tgids_show,
4829 };
4830
4831 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4832 {
4833         if (tracing_disabled)
4834                 return -ENODEV;
4835
4836         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4837 }
4838
4839
4840 static const struct file_operations tracing_saved_tgids_fops = {
4841         .open           = tracing_saved_tgids_open,
4842         .read           = seq_read,
4843         .llseek         = seq_lseek,
4844         .release        = seq_release,
4845 };
4846
4847 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4848 {
4849         unsigned int *ptr = v;
4850
4851         if (*pos || m->count)
4852                 ptr++;
4853
4854         (*pos)++;
4855
4856         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4857              ptr++) {
4858                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4859                         continue;
4860
4861                 return ptr;
4862         }
4863
4864         return NULL;
4865 }
4866
4867 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4868 {
4869         void *v;
4870         loff_t l = 0;
4871
4872         preempt_disable();
4873         arch_spin_lock(&trace_cmdline_lock);
4874
4875         v = &savedcmd->map_cmdline_to_pid[0];
4876         while (l <= *pos) {
4877                 v = saved_cmdlines_next(m, v, &l);
4878                 if (!v)
4879                         return NULL;
4880         }
4881
4882         return v;
4883 }
4884
4885 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4886 {
4887         arch_spin_unlock(&trace_cmdline_lock);
4888         preempt_enable();
4889 }
4890
4891 static int saved_cmdlines_show(struct seq_file *m, void *v)
4892 {
4893         char buf[TASK_COMM_LEN];
4894         unsigned int *pid = v;
4895
4896         __trace_find_cmdline(*pid, buf);
4897         seq_printf(m, "%d %s\n", *pid, buf);
4898         return 0;
4899 }
4900
4901 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4902         .start          = saved_cmdlines_start,
4903         .next           = saved_cmdlines_next,
4904         .stop           = saved_cmdlines_stop,
4905         .show           = saved_cmdlines_show,
4906 };
4907
4908 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4909 {
4910         if (tracing_disabled)
4911                 return -ENODEV;
4912
4913         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4914 }
4915
4916 static const struct file_operations tracing_saved_cmdlines_fops = {
4917         .open           = tracing_saved_cmdlines_open,
4918         .read           = seq_read,
4919         .llseek         = seq_lseek,
4920         .release        = seq_release,
4921 };
4922
4923 static ssize_t
4924 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4925                                  size_t cnt, loff_t *ppos)
4926 {
4927         char buf[64];
4928         int r;
4929
4930         arch_spin_lock(&trace_cmdline_lock);
4931         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4932         arch_spin_unlock(&trace_cmdline_lock);
4933
4934         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4935 }
4936
4937 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4938 {
4939         kfree(s->saved_cmdlines);
4940         kfree(s->map_cmdline_to_pid);
4941         kfree(s);
4942 }
4943
4944 static int tracing_resize_saved_cmdlines(unsigned int val)
4945 {
4946         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4947
4948         s = kmalloc(sizeof(*s), GFP_KERNEL);
4949         if (!s)
4950                 return -ENOMEM;
4951
4952         if (allocate_cmdlines_buffer(val, s) < 0) {
4953                 kfree(s);
4954                 return -ENOMEM;
4955         }
4956
4957         arch_spin_lock(&trace_cmdline_lock);
4958         savedcmd_temp = savedcmd;
4959         savedcmd = s;
4960         arch_spin_unlock(&trace_cmdline_lock);
4961         free_saved_cmdlines_buffer(savedcmd_temp);
4962
4963         return 0;
4964 }
4965
4966 static ssize_t
4967 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4968                                   size_t cnt, loff_t *ppos)
4969 {
4970         unsigned long val;
4971         int ret;
4972
4973         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4974         if (ret)
4975                 return ret;
4976
4977         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4978         if (!val || val > PID_MAX_DEFAULT)
4979                 return -EINVAL;
4980
4981         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4982         if (ret < 0)
4983                 return ret;
4984
4985         *ppos += cnt;
4986
4987         return cnt;
4988 }
4989
4990 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4991         .open           = tracing_open_generic,
4992         .read           = tracing_saved_cmdlines_size_read,
4993         .write          = tracing_saved_cmdlines_size_write,
4994 };
4995
4996 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4997 static union trace_eval_map_item *
4998 update_eval_map(union trace_eval_map_item *ptr)
4999 {
5000         if (!ptr->map.eval_string) {
5001                 if (ptr->tail.next) {
5002                         ptr = ptr->tail.next;
5003                         /* Set ptr to the next real item (skip head) */
5004                         ptr++;
5005                 } else
5006                         return NULL;
5007         }
5008         return ptr;
5009 }
5010
5011 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5012 {
5013         union trace_eval_map_item *ptr = v;
5014
5015         /*
5016          * Paranoid! If ptr points to end, we don't want to increment past it.
5017          * This really should never happen.
5018          */
5019         ptr = update_eval_map(ptr);
5020         if (WARN_ON_ONCE(!ptr))
5021                 return NULL;
5022
5023         ptr++;
5024
5025         (*pos)++;
5026
5027         ptr = update_eval_map(ptr);
5028
5029         return ptr;
5030 }
5031
5032 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5033 {
5034         union trace_eval_map_item *v;
5035         loff_t l = 0;
5036
5037         mutex_lock(&trace_eval_mutex);
5038
5039         v = trace_eval_maps;
5040         if (v)
5041                 v++;
5042
5043         while (v && l < *pos) {
5044                 v = eval_map_next(m, v, &l);
5045         }
5046
5047         return v;
5048 }
5049
5050 static void eval_map_stop(struct seq_file *m, void *v)
5051 {
5052         mutex_unlock(&trace_eval_mutex);
5053 }
5054
5055 static int eval_map_show(struct seq_file *m, void *v)
5056 {
5057         union trace_eval_map_item *ptr = v;
5058
5059         seq_printf(m, "%s %ld (%s)\n",
5060                    ptr->map.eval_string, ptr->map.eval_value,
5061                    ptr->map.system);
5062
5063         return 0;
5064 }
5065
5066 static const struct seq_operations tracing_eval_map_seq_ops = {
5067         .start          = eval_map_start,
5068         .next           = eval_map_next,
5069         .stop           = eval_map_stop,
5070         .show           = eval_map_show,
5071 };
5072
5073 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5074 {
5075         if (tracing_disabled)
5076                 return -ENODEV;
5077
5078         return seq_open(filp, &tracing_eval_map_seq_ops);
5079 }
5080
5081 static const struct file_operations tracing_eval_map_fops = {
5082         .open           = tracing_eval_map_open,
5083         .read           = seq_read,
5084         .llseek         = seq_lseek,
5085         .release        = seq_release,
5086 };
5087
5088 static inline union trace_eval_map_item *
5089 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5090 {
5091         /* Return tail of array given the head */
5092         return ptr + ptr->head.length + 1;
5093 }
5094
5095 static void
5096 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5097                            int len)
5098 {
5099         struct trace_eval_map **stop;
5100         struct trace_eval_map **map;
5101         union trace_eval_map_item *map_array;
5102         union trace_eval_map_item *ptr;
5103
5104         stop = start + len;
5105
5106         /*
5107          * The trace_eval_maps contains the map plus a head and tail item,
5108          * where the head holds the module and length of array, and the
5109          * tail holds a pointer to the next list.
5110          */
5111         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5112         if (!map_array) {
5113                 pr_warn("Unable to allocate trace eval mapping\n");
5114                 return;
5115         }
5116
5117         mutex_lock(&trace_eval_mutex);
5118
5119         if (!trace_eval_maps)
5120                 trace_eval_maps = map_array;
5121         else {
5122                 ptr = trace_eval_maps;
5123                 for (;;) {
5124                         ptr = trace_eval_jmp_to_tail(ptr);
5125                         if (!ptr->tail.next)
5126                                 break;
5127                         ptr = ptr->tail.next;
5128
5129                 }
5130                 ptr->tail.next = map_array;
5131         }
5132         map_array->head.mod = mod;
5133         map_array->head.length = len;
5134         map_array++;
5135
5136         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5137                 map_array->map = **map;
5138                 map_array++;
5139         }
5140         memset(map_array, 0, sizeof(*map_array));
5141
5142         mutex_unlock(&trace_eval_mutex);
5143 }
5144
5145 static void trace_create_eval_file(struct dentry *d_tracer)
5146 {
5147         trace_create_file("eval_map", 0444, d_tracer,
5148                           NULL, &tracing_eval_map_fops);
5149 }
5150
5151 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5152 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5153 static inline void trace_insert_eval_map_file(struct module *mod,
5154                               struct trace_eval_map **start, int len) { }
5155 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5156
5157 static void trace_insert_eval_map(struct module *mod,
5158                                   struct trace_eval_map **start, int len)
5159 {
5160         struct trace_eval_map **map;
5161
5162         if (len <= 0)
5163                 return;
5164
5165         map = start;
5166
5167         trace_event_eval_update(map, len);
5168
5169         trace_insert_eval_map_file(mod, start, len);
5170 }
5171
5172 static ssize_t
5173 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5174                        size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_array *tr = filp->private_data;
5177         char buf[MAX_TRACER_SIZE+2];
5178         int r;
5179
5180         mutex_lock(&trace_types_lock);
5181         r = sprintf(buf, "%s\n", tr->current_trace->name);
5182         mutex_unlock(&trace_types_lock);
5183
5184         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5185 }
5186
5187 int tracer_init(struct tracer *t, struct trace_array *tr)
5188 {
5189         tracing_reset_online_cpus(&tr->trace_buffer);
5190         return t->init(tr);
5191 }
5192
5193 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5194 {
5195         int cpu;
5196
5197         for_each_tracing_cpu(cpu)
5198                 per_cpu_ptr(buf->data, cpu)->entries = val;
5199 }
5200
5201 #ifdef CONFIG_TRACER_MAX_TRACE
5202 /* resize @tr's buffer to the size of @size_tr's entries */
5203 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5204                                         struct trace_buffer *size_buf, int cpu_id)
5205 {
5206         int cpu, ret = 0;
5207
5208         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5209                 for_each_tracing_cpu(cpu) {
5210                         ret = ring_buffer_resize(trace_buf->buffer,
5211                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5212                         if (ret < 0)
5213                                 break;
5214                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5215                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5216                 }
5217         } else {
5218                 ret = ring_buffer_resize(trace_buf->buffer,
5219                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5220                 if (ret == 0)
5221                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5222                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5223         }
5224
5225         return ret;
5226 }
5227 #endif /* CONFIG_TRACER_MAX_TRACE */
5228
5229 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5230                                         unsigned long size, int cpu)
5231 {
5232         int ret;
5233
5234         /*
5235          * If kernel or user changes the size of the ring buffer
5236          * we use the size that was given, and we can forget about
5237          * expanding it later.
5238          */
5239         ring_buffer_expanded = true;
5240
5241         /* May be called before buffers are initialized */
5242         if (!tr->trace_buffer.buffer)
5243                 return 0;
5244
5245         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5246         if (ret < 0)
5247                 return ret;
5248
5249 #ifdef CONFIG_TRACER_MAX_TRACE
5250         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5251             !tr->current_trace->use_max_tr)
5252                 goto out;
5253
5254         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5255         if (ret < 0) {
5256                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5257                                                      &tr->trace_buffer, cpu);
5258                 if (r < 0) {
5259                         /*
5260                          * AARGH! We are left with different
5261                          * size max buffer!!!!
5262                          * The max buffer is our "snapshot" buffer.
5263                          * When a tracer needs a snapshot (one of the
5264                          * latency tracers), it swaps the max buffer
5265                          * with the saved snap shot. We succeeded to
5266                          * update the size of the main buffer, but failed to
5267                          * update the size of the max buffer. But when we tried
5268                          * to reset the main buffer to the original size, we
5269                          * failed there too. This is very unlikely to
5270                          * happen, but if it does, warn and kill all
5271                          * tracing.
5272                          */
5273                         WARN_ON(1);
5274                         tracing_disabled = 1;
5275                 }
5276                 return ret;
5277         }
5278
5279         if (cpu == RING_BUFFER_ALL_CPUS)
5280                 set_buffer_entries(&tr->max_buffer, size);
5281         else
5282                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5283
5284  out:
5285 #endif /* CONFIG_TRACER_MAX_TRACE */
5286
5287         if (cpu == RING_BUFFER_ALL_CPUS)
5288                 set_buffer_entries(&tr->trace_buffer, size);
5289         else
5290                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5291
5292         return ret;
5293 }
5294
5295 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5296                                           unsigned long size, int cpu_id)
5297 {
5298         int ret = size;
5299
5300         mutex_lock(&trace_types_lock);
5301
5302         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5303                 /* make sure, this cpu is enabled in the mask */
5304                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5305                         ret = -EINVAL;
5306                         goto out;
5307                 }
5308         }
5309
5310         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5311         if (ret < 0)
5312                 ret = -ENOMEM;
5313
5314 out:
5315         mutex_unlock(&trace_types_lock);
5316
5317         return ret;
5318 }
5319
5320
5321 /**
5322  * tracing_update_buffers - used by tracing facility to expand ring buffers
5323  *
5324  * To save on memory when the tracing is never used on a system with it
5325  * configured in. The ring buffers are set to a minimum size. But once
5326  * a user starts to use the tracing facility, then they need to grow
5327  * to their default size.
5328  *
5329  * This function is to be called when a tracer is about to be used.
5330  */
5331 int tracing_update_buffers(void)
5332 {
5333         int ret = 0;
5334
5335         mutex_lock(&trace_types_lock);
5336         if (!ring_buffer_expanded)
5337                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5338                                                 RING_BUFFER_ALL_CPUS);
5339         mutex_unlock(&trace_types_lock);
5340
5341         return ret;
5342 }
5343
5344 struct trace_option_dentry;
5345
5346 static void
5347 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5348
5349 /*
5350  * Used to clear out the tracer before deletion of an instance.
5351  * Must have trace_types_lock held.
5352  */
5353 static void tracing_set_nop(struct trace_array *tr)
5354 {
5355         if (tr->current_trace == &nop_trace)
5356                 return;
5357         
5358         tr->current_trace->enabled--;
5359
5360         if (tr->current_trace->reset)
5361                 tr->current_trace->reset(tr);
5362
5363         tr->current_trace = &nop_trace;
5364 }
5365
5366 static bool tracer_options_updated;
5367
5368 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5369 {
5370         /* Only enable if the directory has been created already. */
5371         if (!tr->dir)
5372                 return;
5373
5374         /* Only create trace option files after update_tracer_options finish */
5375         if (!tracer_options_updated)
5376                 return;
5377
5378         create_trace_option_files(tr, t);
5379 }
5380
5381 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5382 {
5383         struct tracer *t;
5384 #ifdef CONFIG_TRACER_MAX_TRACE
5385         bool had_max_tr;
5386 #endif
5387         int ret = 0;
5388
5389         mutex_lock(&trace_types_lock);
5390
5391         if (!ring_buffer_expanded) {
5392                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5393                                                 RING_BUFFER_ALL_CPUS);
5394                 if (ret < 0)
5395                         goto out;
5396                 ret = 0;
5397         }
5398
5399         for (t = trace_types; t; t = t->next) {
5400                 if (strcmp(t->name, buf) == 0)
5401                         break;
5402         }
5403         if (!t) {
5404                 ret = -EINVAL;
5405                 goto out;
5406         }
5407         if (t == tr->current_trace)
5408                 goto out;
5409
5410         /* Some tracers won't work on kernel command line */
5411         if (system_state < SYSTEM_RUNNING && t->noboot) {
5412                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5413                         t->name);
5414                 goto out;
5415         }
5416
5417         /* Some tracers are only allowed for the top level buffer */
5418         if (!trace_ok_for_array(t, tr)) {
5419                 ret = -EINVAL;
5420                 goto out;
5421         }
5422
5423         /* If trace pipe files are being read, we can't change the tracer */
5424         if (tr->current_trace->ref) {
5425                 ret = -EBUSY;
5426                 goto out;
5427         }
5428
5429         trace_branch_disable();
5430
5431         tr->current_trace->enabled--;
5432
5433         if (tr->current_trace->reset)
5434                 tr->current_trace->reset(tr);
5435
5436         /* Current trace needs to be nop_trace before synchronize_sched */
5437         tr->current_trace = &nop_trace;
5438
5439 #ifdef CONFIG_TRACER_MAX_TRACE
5440         had_max_tr = tr->allocated_snapshot;
5441
5442         if (had_max_tr && !t->use_max_tr) {
5443                 /*
5444                  * We need to make sure that the update_max_tr sees that
5445                  * current_trace changed to nop_trace to keep it from
5446                  * swapping the buffers after we resize it.
5447                  * The update_max_tr is called from interrupts disabled
5448                  * so a synchronized_sched() is sufficient.
5449                  */
5450                 synchronize_sched();
5451                 free_snapshot(tr);
5452         }
5453 #endif
5454
5455 #ifdef CONFIG_TRACER_MAX_TRACE
5456         if (t->use_max_tr && !had_max_tr) {
5457                 ret = tracing_alloc_snapshot_instance(tr);
5458                 if (ret < 0)
5459                         goto out;
5460         }
5461 #endif
5462
5463         if (t->init) {
5464                 ret = tracer_init(t, tr);
5465                 if (ret)
5466                         goto out;
5467         }
5468
5469         tr->current_trace = t;
5470         tr->current_trace->enabled++;
5471         trace_branch_enable(tr);
5472  out:
5473         mutex_unlock(&trace_types_lock);
5474
5475         return ret;
5476 }
5477
5478 static ssize_t
5479 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5480                         size_t cnt, loff_t *ppos)
5481 {
5482         struct trace_array *tr = filp->private_data;
5483         char buf[MAX_TRACER_SIZE+1];
5484         int i;
5485         size_t ret;
5486         int err;
5487
5488         ret = cnt;
5489
5490         if (cnt > MAX_TRACER_SIZE)
5491                 cnt = MAX_TRACER_SIZE;
5492
5493         if (copy_from_user(buf, ubuf, cnt))
5494                 return -EFAULT;
5495
5496         buf[cnt] = 0;
5497
5498         /* strip ending whitespace. */
5499         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5500                 buf[i] = 0;
5501
5502         err = tracing_set_tracer(tr, buf);
5503         if (err)
5504                 return err;
5505
5506         *ppos += ret;
5507
5508         return ret;
5509 }
5510
5511 static ssize_t
5512 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5513                    size_t cnt, loff_t *ppos)
5514 {
5515         char buf[64];
5516         int r;
5517
5518         r = snprintf(buf, sizeof(buf), "%ld\n",
5519                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5520         if (r > sizeof(buf))
5521                 r = sizeof(buf);
5522         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5523 }
5524
5525 static ssize_t
5526 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5527                     size_t cnt, loff_t *ppos)
5528 {
5529         unsigned long val;
5530         int ret;
5531
5532         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5533         if (ret)
5534                 return ret;
5535
5536         *ptr = val * 1000;
5537
5538         return cnt;
5539 }
5540
5541 static ssize_t
5542 tracing_thresh_read(struct file *filp, char __user *ubuf,
5543                     size_t cnt, loff_t *ppos)
5544 {
5545         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5546 }
5547
5548 static ssize_t
5549 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5550                      size_t cnt, loff_t *ppos)
5551 {
5552         struct trace_array *tr = filp->private_data;
5553         int ret;
5554
5555         mutex_lock(&trace_types_lock);
5556         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5557         if (ret < 0)
5558                 goto out;
5559
5560         if (tr->current_trace->update_thresh) {
5561                 ret = tr->current_trace->update_thresh(tr);
5562                 if (ret < 0)
5563                         goto out;
5564         }
5565
5566         ret = cnt;
5567 out:
5568         mutex_unlock(&trace_types_lock);
5569
5570         return ret;
5571 }
5572
5573 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5574
5575 static ssize_t
5576 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5577                      size_t cnt, loff_t *ppos)
5578 {
5579         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5580 }
5581
5582 static ssize_t
5583 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5584                       size_t cnt, loff_t *ppos)
5585 {
5586         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5587 }
5588
5589 #endif
5590
5591 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5592 {
5593         struct trace_array *tr = inode->i_private;
5594         struct trace_iterator *iter;
5595         int ret = 0;
5596
5597         if (tracing_disabled)
5598                 return -ENODEV;
5599
5600         if (trace_array_get(tr) < 0)
5601                 return -ENODEV;
5602
5603         mutex_lock(&trace_types_lock);
5604
5605         /* create a buffer to store the information to pass to userspace */
5606         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5607         if (!iter) {
5608                 ret = -ENOMEM;
5609                 __trace_array_put(tr);
5610                 goto out;
5611         }
5612
5613         trace_seq_init(&iter->seq);
5614         iter->trace = tr->current_trace;
5615
5616         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5617                 ret = -ENOMEM;
5618                 goto fail;
5619         }
5620
5621         /* trace pipe does not show start of buffer */
5622         cpumask_setall(iter->started);
5623
5624         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5625                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5626
5627         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5628         if (trace_clocks[tr->clock_id].in_ns)
5629                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5630
5631         iter->tr = tr;
5632         iter->trace_buffer = &tr->trace_buffer;
5633         iter->cpu_file = tracing_get_cpu(inode);
5634         mutex_init(&iter->mutex);
5635         filp->private_data = iter;
5636
5637         if (iter->trace->pipe_open)
5638                 iter->trace->pipe_open(iter);
5639
5640         nonseekable_open(inode, filp);
5641
5642         tr->current_trace->ref++;
5643 out:
5644         mutex_unlock(&trace_types_lock);
5645         return ret;
5646
5647 fail:
5648         kfree(iter);
5649         __trace_array_put(tr);
5650         mutex_unlock(&trace_types_lock);
5651         return ret;
5652 }
5653
5654 static int tracing_release_pipe(struct inode *inode, struct file *file)
5655 {
5656         struct trace_iterator *iter = file->private_data;
5657         struct trace_array *tr = inode->i_private;
5658
5659         mutex_lock(&trace_types_lock);
5660
5661         tr->current_trace->ref--;
5662
5663         if (iter->trace->pipe_close)
5664                 iter->trace->pipe_close(iter);
5665
5666         mutex_unlock(&trace_types_lock);
5667
5668         free_cpumask_var(iter->started);
5669         mutex_destroy(&iter->mutex);
5670         kfree(iter);
5671
5672         trace_array_put(tr);
5673
5674         return 0;
5675 }
5676
5677 static unsigned int
5678 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5679 {
5680         struct trace_array *tr = iter->tr;
5681
5682         /* Iterators are static, they should be filled or empty */
5683         if (trace_buffer_iter(iter, iter->cpu_file))
5684                 return POLLIN | POLLRDNORM;
5685
5686         if (tr->trace_flags & TRACE_ITER_BLOCK)
5687                 /*
5688                  * Always select as readable when in blocking mode
5689                  */
5690                 return POLLIN | POLLRDNORM;
5691         else
5692                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5693                                              filp, poll_table);
5694 }
5695
5696 static unsigned int
5697 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5698 {
5699         struct trace_iterator *iter = filp->private_data;
5700
5701         return trace_poll(iter, filp, poll_table);
5702 }
5703
5704 /* Must be called with iter->mutex held. */
5705 static int tracing_wait_pipe(struct file *filp)
5706 {
5707         struct trace_iterator *iter = filp->private_data;
5708         int ret;
5709
5710         while (trace_empty(iter)) {
5711
5712                 if ((filp->f_flags & O_NONBLOCK)) {
5713                         return -EAGAIN;
5714                 }
5715
5716                 /*
5717                  * We block until we read something and tracing is disabled.
5718                  * We still block if tracing is disabled, but we have never
5719                  * read anything. This allows a user to cat this file, and
5720                  * then enable tracing. But after we have read something,
5721                  * we give an EOF when tracing is again disabled.
5722                  *
5723                  * iter->pos will be 0 if we haven't read anything.
5724                  */
5725                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5726                         break;
5727
5728                 mutex_unlock(&iter->mutex);
5729
5730                 ret = wait_on_pipe(iter, false);
5731
5732                 mutex_lock(&iter->mutex);
5733
5734                 if (ret)
5735                         return ret;
5736         }
5737
5738         return 1;
5739 }
5740
5741 /*
5742  * Consumer reader.
5743  */
5744 static ssize_t
5745 tracing_read_pipe(struct file *filp, char __user *ubuf,
5746                   size_t cnt, loff_t *ppos)
5747 {
5748         struct trace_iterator *iter = filp->private_data;
5749         ssize_t sret;
5750
5751         /*
5752          * Avoid more than one consumer on a single file descriptor
5753          * This is just a matter of traces coherency, the ring buffer itself
5754          * is protected.
5755          */
5756         mutex_lock(&iter->mutex);
5757
5758         /* return any leftover data */
5759         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5760         if (sret != -EBUSY)
5761                 goto out;
5762
5763         trace_seq_init(&iter->seq);
5764
5765         if (iter->trace->read) {
5766                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5767                 if (sret)
5768                         goto out;
5769         }
5770
5771 waitagain:
5772         sret = tracing_wait_pipe(filp);
5773         if (sret <= 0)
5774                 goto out;
5775
5776         /* stop when tracing is finished */
5777         if (trace_empty(iter)) {
5778                 sret = 0;
5779                 goto out;
5780         }
5781
5782         if (cnt >= PAGE_SIZE)
5783                 cnt = PAGE_SIZE - 1;
5784
5785         /* reset all but tr, trace, and overruns */
5786         memset(&iter->seq, 0,
5787                sizeof(struct trace_iterator) -
5788                offsetof(struct trace_iterator, seq));
5789         cpumask_clear(iter->started);
5790         trace_seq_init(&iter->seq);
5791         iter->pos = -1;
5792
5793         trace_event_read_lock();
5794         trace_access_lock(iter->cpu_file);
5795         while (trace_find_next_entry_inc(iter) != NULL) {
5796                 enum print_line_t ret;
5797                 int save_len = iter->seq.seq.len;
5798
5799                 ret = print_trace_line(iter);
5800                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5801                         /* don't print partial lines */
5802                         iter->seq.seq.len = save_len;
5803                         break;
5804                 }
5805                 if (ret != TRACE_TYPE_NO_CONSUME)
5806                         trace_consume(iter);
5807
5808                 if (trace_seq_used(&iter->seq) >= cnt)
5809                         break;
5810
5811                 /*
5812                  * Setting the full flag means we reached the trace_seq buffer
5813                  * size and we should leave by partial output condition above.
5814                  * One of the trace_seq_* functions is not used properly.
5815                  */
5816                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5817                           iter->ent->type);
5818         }
5819         trace_access_unlock(iter->cpu_file);
5820         trace_event_read_unlock();
5821
5822         /* Now copy what we have to the user */
5823         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5824         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5825                 trace_seq_init(&iter->seq);
5826
5827         /*
5828          * If there was nothing to send to user, in spite of consuming trace
5829          * entries, go back to wait for more entries.
5830          */
5831         if (sret == -EBUSY)
5832                 goto waitagain;
5833
5834 out:
5835         mutex_unlock(&iter->mutex);
5836
5837         return sret;
5838 }
5839
5840 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5841                                      unsigned int idx)
5842 {
5843         __free_page(spd->pages[idx]);
5844 }
5845
5846 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5847         .can_merge              = 0,
5848         .confirm                = generic_pipe_buf_confirm,
5849         .release                = generic_pipe_buf_release,
5850         .steal                  = generic_pipe_buf_steal,
5851         .get                    = generic_pipe_buf_get,
5852 };
5853
5854 static size_t
5855 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5856 {
5857         size_t count;
5858         int save_len;
5859         int ret;
5860
5861         /* Seq buffer is page-sized, exactly what we need. */
5862         for (;;) {
5863                 save_len = iter->seq.seq.len;
5864                 ret = print_trace_line(iter);
5865
5866                 if (trace_seq_has_overflowed(&iter->seq)) {
5867                         iter->seq.seq.len = save_len;
5868                         break;
5869                 }
5870
5871                 /*
5872                  * This should not be hit, because it should only
5873                  * be set if the iter->seq overflowed. But check it
5874                  * anyway to be safe.
5875                  */
5876                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5877                         iter->seq.seq.len = save_len;
5878                         break;
5879                 }
5880
5881                 count = trace_seq_used(&iter->seq) - save_len;
5882                 if (rem < count) {
5883                         rem = 0;
5884                         iter->seq.seq.len = save_len;
5885                         break;
5886                 }
5887
5888                 if (ret != TRACE_TYPE_NO_CONSUME)
5889                         trace_consume(iter);
5890                 rem -= count;
5891                 if (!trace_find_next_entry_inc(iter))   {
5892                         rem = 0;
5893                         iter->ent = NULL;
5894                         break;
5895                 }
5896         }
5897
5898         return rem;
5899 }
5900
5901 static ssize_t tracing_splice_read_pipe(struct file *filp,
5902                                         loff_t *ppos,
5903                                         struct pipe_inode_info *pipe,
5904                                         size_t len,
5905                                         unsigned int flags)
5906 {
5907         struct page *pages_def[PIPE_DEF_BUFFERS];
5908         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5909         struct trace_iterator *iter = filp->private_data;
5910         struct splice_pipe_desc spd = {
5911                 .pages          = pages_def,
5912                 .partial        = partial_def,
5913                 .nr_pages       = 0, /* This gets updated below. */
5914                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5915                 .ops            = &tracing_pipe_buf_ops,
5916                 .spd_release    = tracing_spd_release_pipe,
5917         };
5918         ssize_t ret;
5919         size_t rem;
5920         unsigned int i;
5921
5922         if (splice_grow_spd(pipe, &spd))
5923                 return -ENOMEM;
5924
5925         mutex_lock(&iter->mutex);
5926
5927         if (iter->trace->splice_read) {
5928                 ret = iter->trace->splice_read(iter, filp,
5929                                                ppos, pipe, len, flags);
5930                 if (ret)
5931                         goto out_err;
5932         }
5933
5934         ret = tracing_wait_pipe(filp);
5935         if (ret <= 0)
5936                 goto out_err;
5937
5938         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5939                 ret = -EFAULT;
5940                 goto out_err;
5941         }
5942
5943         trace_event_read_lock();
5944         trace_access_lock(iter->cpu_file);
5945
5946         /* Fill as many pages as possible. */
5947         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5948                 spd.pages[i] = alloc_page(GFP_KERNEL);
5949                 if (!spd.pages[i])
5950                         break;
5951
5952                 rem = tracing_fill_pipe_page(rem, iter);
5953
5954                 /* Copy the data into the page, so we can start over. */
5955                 ret = trace_seq_to_buffer(&iter->seq,
5956                                           page_address(spd.pages[i]),
5957                                           trace_seq_used(&iter->seq));
5958                 if (ret < 0) {
5959                         __free_page(spd.pages[i]);
5960                         break;
5961                 }
5962                 spd.partial[i].offset = 0;
5963                 spd.partial[i].len = trace_seq_used(&iter->seq);
5964
5965                 trace_seq_init(&iter->seq);
5966         }
5967
5968         trace_access_unlock(iter->cpu_file);
5969         trace_event_read_unlock();
5970         mutex_unlock(&iter->mutex);
5971
5972         spd.nr_pages = i;
5973
5974         if (i)
5975                 ret = splice_to_pipe(pipe, &spd);
5976         else
5977                 ret = 0;
5978 out:
5979         splice_shrink_spd(&spd);
5980         return ret;
5981
5982 out_err:
5983         mutex_unlock(&iter->mutex);
5984         goto out;
5985 }
5986
5987 static ssize_t
5988 tracing_entries_read(struct file *filp, char __user *ubuf,
5989                      size_t cnt, loff_t *ppos)
5990 {
5991         struct inode *inode = file_inode(filp);
5992         struct trace_array *tr = inode->i_private;
5993         int cpu = tracing_get_cpu(inode);
5994         char buf[64];
5995         int r = 0;
5996         ssize_t ret;
5997
5998         mutex_lock(&trace_types_lock);
5999
6000         if (cpu == RING_BUFFER_ALL_CPUS) {
6001                 int cpu, buf_size_same;
6002                 unsigned long size;
6003
6004                 size = 0;
6005                 buf_size_same = 1;
6006                 /* check if all cpu sizes are same */
6007                 for_each_tracing_cpu(cpu) {
6008                         /* fill in the size from first enabled cpu */
6009                         if (size == 0)
6010                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6011                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6012                                 buf_size_same = 0;
6013                                 break;
6014                         }
6015                 }
6016
6017                 if (buf_size_same) {
6018                         if (!ring_buffer_expanded)
6019                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6020                                             size >> 10,
6021                                             trace_buf_size >> 10);
6022                         else
6023                                 r = sprintf(buf, "%lu\n", size >> 10);
6024                 } else
6025                         r = sprintf(buf, "X\n");
6026         } else
6027                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6028
6029         mutex_unlock(&trace_types_lock);
6030
6031         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6032         return ret;
6033 }
6034
6035 static ssize_t
6036 tracing_entries_write(struct file *filp, const char __user *ubuf,
6037                       size_t cnt, loff_t *ppos)
6038 {
6039         struct inode *inode = file_inode(filp);
6040         struct trace_array *tr = inode->i_private;
6041         unsigned long val;
6042         int ret;
6043
6044         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6045         if (ret)
6046                 return ret;
6047
6048         /* must have at least 1 entry */
6049         if (!val)
6050                 return -EINVAL;
6051
6052         /* value is in KB */
6053         val <<= 10;
6054         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6055         if (ret < 0)
6056                 return ret;
6057
6058         *ppos += cnt;
6059
6060         return cnt;
6061 }
6062
6063 static ssize_t
6064 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6065                                 size_t cnt, loff_t *ppos)
6066 {
6067         struct trace_array *tr = filp->private_data;
6068         char buf[64];
6069         int r, cpu;
6070         unsigned long size = 0, expanded_size = 0;
6071
6072         mutex_lock(&trace_types_lock);
6073         for_each_tracing_cpu(cpu) {
6074                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6075                 if (!ring_buffer_expanded)
6076                         expanded_size += trace_buf_size >> 10;
6077         }
6078         if (ring_buffer_expanded)
6079                 r = sprintf(buf, "%lu\n", size);
6080         else
6081                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6082         mutex_unlock(&trace_types_lock);
6083
6084         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6085 }
6086
6087 static ssize_t
6088 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6089                           size_t cnt, loff_t *ppos)
6090 {
6091         /*
6092          * There is no need to read what the user has written, this function
6093          * is just to make sure that there is no error when "echo" is used
6094          */
6095
6096         *ppos += cnt;
6097
6098         return cnt;
6099 }
6100
6101 static int
6102 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6103 {
6104         struct trace_array *tr = inode->i_private;
6105
6106         /* disable tracing ? */
6107         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6108                 tracer_tracing_off(tr);
6109         /* resize the ring buffer to 0 */
6110         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6111
6112         trace_array_put(tr);
6113
6114         return 0;
6115 }
6116
6117 static ssize_t
6118 tracing_mark_write(struct file *filp, const char __user *ubuf,
6119                                         size_t cnt, loff_t *fpos)
6120 {
6121         struct trace_array *tr = filp->private_data;
6122         struct ring_buffer_event *event;
6123         struct ring_buffer *buffer;
6124         struct print_entry *entry;
6125         unsigned long irq_flags;
6126         const char faulted[] = "<faulted>";
6127         ssize_t written;
6128         int size;
6129         int len;
6130
6131 /* Used in tracing_mark_raw_write() as well */
6132 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6133
6134         if (tracing_disabled)
6135                 return -EINVAL;
6136
6137         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6138                 return -EINVAL;
6139
6140         if (cnt > TRACE_BUF_SIZE)
6141                 cnt = TRACE_BUF_SIZE;
6142
6143         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6144
6145         local_save_flags(irq_flags);
6146         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6147
6148         /* If less than "<faulted>", then make sure we can still add that */
6149         if (cnt < FAULTED_SIZE)
6150                 size += FAULTED_SIZE - cnt;
6151
6152         buffer = tr->trace_buffer.buffer;
6153         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6154                                             irq_flags, preempt_count());
6155         if (unlikely(!event))
6156                 /* Ring buffer disabled, return as if not open for write */
6157                 return -EBADF;
6158
6159         entry = ring_buffer_event_data(event);
6160         entry->ip = _THIS_IP_;
6161
6162         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6163         if (len) {
6164                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6165                 cnt = FAULTED_SIZE;
6166                 written = -EFAULT;
6167         } else
6168                 written = cnt;
6169         len = cnt;
6170
6171         if (entry->buf[cnt - 1] != '\n') {
6172                 entry->buf[cnt] = '\n';
6173                 entry->buf[cnt + 1] = '\0';
6174         } else
6175                 entry->buf[cnt] = '\0';
6176
6177         __buffer_unlock_commit(buffer, event);
6178
6179         if (written > 0)
6180                 *fpos += written;
6181
6182         return written;
6183 }
6184
6185 /* Limit it for now to 3K (including tag) */
6186 #define RAW_DATA_MAX_SIZE (1024*3)
6187
6188 static ssize_t
6189 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6190                                         size_t cnt, loff_t *fpos)
6191 {
6192         struct trace_array *tr = filp->private_data;
6193         struct ring_buffer_event *event;
6194         struct ring_buffer *buffer;
6195         struct raw_data_entry *entry;
6196         const char faulted[] = "<faulted>";
6197         unsigned long irq_flags;
6198         ssize_t written;
6199         int size;
6200         int len;
6201
6202 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6203
6204         if (tracing_disabled)
6205                 return -EINVAL;
6206
6207         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6208                 return -EINVAL;
6209
6210         /* The marker must at least have a tag id */
6211         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6212                 return -EINVAL;
6213
6214         if (cnt > TRACE_BUF_SIZE)
6215                 cnt = TRACE_BUF_SIZE;
6216
6217         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6218
6219         local_save_flags(irq_flags);
6220         size = sizeof(*entry) + cnt;
6221         if (cnt < FAULT_SIZE_ID)
6222                 size += FAULT_SIZE_ID - cnt;
6223
6224         buffer = tr->trace_buffer.buffer;
6225         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6226                                             irq_flags, preempt_count());
6227         if (!event)
6228                 /* Ring buffer disabled, return as if not open for write */
6229                 return -EBADF;
6230
6231         entry = ring_buffer_event_data(event);
6232
6233         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6234         if (len) {
6235                 entry->id = -1;
6236                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6237                 written = -EFAULT;
6238         } else
6239                 written = cnt;
6240
6241         __buffer_unlock_commit(buffer, event);
6242
6243         if (written > 0)
6244                 *fpos += written;
6245
6246         return written;
6247 }
6248
6249 static int tracing_clock_show(struct seq_file *m, void *v)
6250 {
6251         struct trace_array *tr = m->private;
6252         int i;
6253
6254         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6255                 seq_printf(m,
6256                         "%s%s%s%s", i ? " " : "",
6257                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6258                         i == tr->clock_id ? "]" : "");
6259         seq_putc(m, '\n');
6260
6261         return 0;
6262 }
6263
6264 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6265 {
6266         int i;
6267
6268         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6269                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6270                         break;
6271         }
6272         if (i == ARRAY_SIZE(trace_clocks))
6273                 return -EINVAL;
6274
6275         mutex_lock(&trace_types_lock);
6276
6277         tr->clock_id = i;
6278
6279         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6280
6281         /*
6282          * New clock may not be consistent with the previous clock.
6283          * Reset the buffer so that it doesn't have incomparable timestamps.
6284          */
6285         tracing_reset_online_cpus(&tr->trace_buffer);
6286
6287 #ifdef CONFIG_TRACER_MAX_TRACE
6288         if (tr->max_buffer.buffer)
6289                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6290         tracing_reset_online_cpus(&tr->max_buffer);
6291 #endif
6292
6293         mutex_unlock(&trace_types_lock);
6294
6295         return 0;
6296 }
6297
6298 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6299                                    size_t cnt, loff_t *fpos)
6300 {
6301         struct seq_file *m = filp->private_data;
6302         struct trace_array *tr = m->private;
6303         char buf[64];
6304         const char *clockstr;
6305         int ret;
6306
6307         if (cnt >= sizeof(buf))
6308                 return -EINVAL;
6309
6310         if (copy_from_user(buf, ubuf, cnt))
6311                 return -EFAULT;
6312
6313         buf[cnt] = 0;
6314
6315         clockstr = strstrip(buf);
6316
6317         ret = tracing_set_clock(tr, clockstr);
6318         if (ret)
6319                 return ret;
6320
6321         *fpos += cnt;
6322
6323         return cnt;
6324 }
6325
6326 static int tracing_clock_open(struct inode *inode, struct file *file)
6327 {
6328         struct trace_array *tr = inode->i_private;
6329         int ret;
6330
6331         if (tracing_disabled)
6332                 return -ENODEV;
6333
6334         if (trace_array_get(tr))
6335                 return -ENODEV;
6336
6337         ret = single_open(file, tracing_clock_show, inode->i_private);
6338         if (ret < 0)
6339                 trace_array_put(tr);
6340
6341         return ret;
6342 }
6343
6344 struct ftrace_buffer_info {
6345         struct trace_iterator   iter;
6346         void                    *spare;
6347         unsigned int            spare_cpu;
6348         unsigned int            read;
6349 };
6350
6351 #ifdef CONFIG_TRACER_SNAPSHOT
6352 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6353 {
6354         struct trace_array *tr = inode->i_private;
6355         struct trace_iterator *iter;
6356         struct seq_file *m;
6357         int ret = 0;
6358
6359         if (trace_array_get(tr) < 0)
6360                 return -ENODEV;
6361
6362         if (file->f_mode & FMODE_READ) {
6363                 iter = __tracing_open(inode, file, true);
6364                 if (IS_ERR(iter))
6365                         ret = PTR_ERR(iter);
6366         } else {
6367                 /* Writes still need the seq_file to hold the private data */
6368                 ret = -ENOMEM;
6369                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6370                 if (!m)
6371                         goto out;
6372                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6373                 if (!iter) {
6374                         kfree(m);
6375                         goto out;
6376                 }
6377                 ret = 0;
6378
6379                 iter->tr = tr;
6380                 iter->trace_buffer = &tr->max_buffer;
6381                 iter->cpu_file = tracing_get_cpu(inode);
6382                 m->private = iter;
6383                 file->private_data = m;
6384         }
6385 out:
6386         if (ret < 0)
6387                 trace_array_put(tr);
6388
6389         return ret;
6390 }
6391
6392 static ssize_t
6393 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6394                        loff_t *ppos)
6395 {
6396         struct seq_file *m = filp->private_data;
6397         struct trace_iterator *iter = m->private;
6398         struct trace_array *tr = iter->tr;
6399         unsigned long val;
6400         int ret;
6401
6402         ret = tracing_update_buffers();
6403         if (ret < 0)
6404                 return ret;
6405
6406         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6407         if (ret)
6408                 return ret;
6409
6410         mutex_lock(&trace_types_lock);
6411
6412         if (tr->current_trace->use_max_tr) {
6413                 ret = -EBUSY;
6414                 goto out;
6415         }
6416
6417         switch (val) {
6418         case 0:
6419                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6420                         ret = -EINVAL;
6421                         break;
6422                 }
6423                 if (tr->allocated_snapshot)
6424                         free_snapshot(tr);
6425                 break;
6426         case 1:
6427 /* Only allow per-cpu swap if the ring buffer supports it */
6428 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6429                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6430                         ret = -EINVAL;
6431                         break;
6432                 }
6433 #endif
6434                 if (tr->allocated_snapshot)
6435                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6436                                         &tr->trace_buffer, iter->cpu_file);
6437                 else
6438                         ret = tracing_alloc_snapshot_instance(tr);
6439                 if (ret < 0)
6440                         break;
6441                 local_irq_disable();
6442                 /* Now, we're going to swap */
6443                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6444                         update_max_tr(tr, current, smp_processor_id());
6445                 else
6446                         update_max_tr_single(tr, current, iter->cpu_file);
6447                 local_irq_enable();
6448                 break;
6449         default:
6450                 if (tr->allocated_snapshot) {
6451                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6452                                 tracing_reset_online_cpus(&tr->max_buffer);
6453                         else
6454                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6455                 }
6456                 break;
6457         }
6458
6459         if (ret >= 0) {
6460                 *ppos += cnt;
6461                 ret = cnt;
6462         }
6463 out:
6464         mutex_unlock(&trace_types_lock);
6465         return ret;
6466 }
6467
6468 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6469 {
6470         struct seq_file *m = file->private_data;
6471         int ret;
6472
6473         ret = tracing_release(inode, file);
6474
6475         if (file->f_mode & FMODE_READ)
6476                 return ret;
6477
6478         /* If write only, the seq_file is just a stub */
6479         if (m)
6480                 kfree(m->private);
6481         kfree(m);
6482
6483         return 0;
6484 }
6485
6486 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6487 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6488                                     size_t count, loff_t *ppos);
6489 static int tracing_buffers_release(struct inode *inode, struct file *file);
6490 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6491                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6492
6493 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6494 {
6495         struct ftrace_buffer_info *info;
6496         int ret;
6497
6498         ret = tracing_buffers_open(inode, filp);
6499         if (ret < 0)
6500                 return ret;
6501
6502         info = filp->private_data;
6503
6504         if (info->iter.trace->use_max_tr) {
6505                 tracing_buffers_release(inode, filp);
6506                 return -EBUSY;
6507         }
6508
6509         info->iter.snapshot = true;
6510         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6511
6512         return ret;
6513 }
6514
6515 #endif /* CONFIG_TRACER_SNAPSHOT */
6516
6517
6518 static const struct file_operations tracing_thresh_fops = {
6519         .open           = tracing_open_generic,
6520         .read           = tracing_thresh_read,
6521         .write          = tracing_thresh_write,
6522         .llseek         = generic_file_llseek,
6523 };
6524
6525 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6526 static const struct file_operations tracing_max_lat_fops = {
6527         .open           = tracing_open_generic,
6528         .read           = tracing_max_lat_read,
6529         .write          = tracing_max_lat_write,
6530         .llseek         = generic_file_llseek,
6531 };
6532 #endif
6533
6534 static const struct file_operations set_tracer_fops = {
6535         .open           = tracing_open_generic,
6536         .read           = tracing_set_trace_read,
6537         .write          = tracing_set_trace_write,
6538         .llseek         = generic_file_llseek,
6539 };
6540
6541 static const struct file_operations tracing_pipe_fops = {
6542         .open           = tracing_open_pipe,
6543         .poll           = tracing_poll_pipe,
6544         .read           = tracing_read_pipe,
6545         .splice_read    = tracing_splice_read_pipe,
6546         .release        = tracing_release_pipe,
6547         .llseek         = no_llseek,
6548 };
6549
6550 static const struct file_operations tracing_entries_fops = {
6551         .open           = tracing_open_generic_tr,
6552         .read           = tracing_entries_read,
6553         .write          = tracing_entries_write,
6554         .llseek         = generic_file_llseek,
6555         .release        = tracing_release_generic_tr,
6556 };
6557
6558 static const struct file_operations tracing_total_entries_fops = {
6559         .open           = tracing_open_generic_tr,
6560         .read           = tracing_total_entries_read,
6561         .llseek         = generic_file_llseek,
6562         .release        = tracing_release_generic_tr,
6563 };
6564
6565 static const struct file_operations tracing_free_buffer_fops = {
6566         .open           = tracing_open_generic_tr,
6567         .write          = tracing_free_buffer_write,
6568         .release        = tracing_free_buffer_release,
6569 };
6570
6571 static const struct file_operations tracing_mark_fops = {
6572         .open           = tracing_open_generic_tr,
6573         .write          = tracing_mark_write,
6574         .llseek         = generic_file_llseek,
6575         .release        = tracing_release_generic_tr,
6576 };
6577
6578 static const struct file_operations tracing_mark_raw_fops = {
6579         .open           = tracing_open_generic_tr,
6580         .write          = tracing_mark_raw_write,
6581         .llseek         = generic_file_llseek,
6582         .release        = tracing_release_generic_tr,
6583 };
6584
6585 static const struct file_operations trace_clock_fops = {
6586         .open           = tracing_clock_open,
6587         .read           = seq_read,
6588         .llseek         = seq_lseek,
6589         .release        = tracing_single_release_tr,
6590         .write          = tracing_clock_write,
6591 };
6592
6593 #ifdef CONFIG_TRACER_SNAPSHOT
6594 static const struct file_operations snapshot_fops = {
6595         .open           = tracing_snapshot_open,
6596         .read           = seq_read,
6597         .write          = tracing_snapshot_write,
6598         .llseek         = tracing_lseek,
6599         .release        = tracing_snapshot_release,
6600 };
6601
6602 static const struct file_operations snapshot_raw_fops = {
6603         .open           = snapshot_raw_open,
6604         .read           = tracing_buffers_read,
6605         .release        = tracing_buffers_release,
6606         .splice_read    = tracing_buffers_splice_read,
6607         .llseek         = no_llseek,
6608 };
6609
6610 #endif /* CONFIG_TRACER_SNAPSHOT */
6611
6612 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6613 {
6614         struct trace_array *tr = inode->i_private;
6615         struct ftrace_buffer_info *info;
6616         int ret;
6617
6618         if (tracing_disabled)
6619                 return -ENODEV;
6620
6621         if (trace_array_get(tr) < 0)
6622                 return -ENODEV;
6623
6624         info = kzalloc(sizeof(*info), GFP_KERNEL);
6625         if (!info) {
6626                 trace_array_put(tr);
6627                 return -ENOMEM;
6628         }
6629
6630         mutex_lock(&trace_types_lock);
6631
6632         info->iter.tr           = tr;
6633         info->iter.cpu_file     = tracing_get_cpu(inode);
6634         info->iter.trace        = tr->current_trace;
6635         info->iter.trace_buffer = &tr->trace_buffer;
6636         info->spare             = NULL;
6637         /* Force reading ring buffer for first read */
6638         info->read              = (unsigned int)-1;
6639
6640         filp->private_data = info;
6641
6642         tr->current_trace->ref++;
6643
6644         mutex_unlock(&trace_types_lock);
6645
6646         ret = nonseekable_open(inode, filp);
6647         if (ret < 0)
6648                 trace_array_put(tr);
6649
6650         return ret;
6651 }
6652
6653 static unsigned int
6654 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6655 {
6656         struct ftrace_buffer_info *info = filp->private_data;
6657         struct trace_iterator *iter = &info->iter;
6658
6659         return trace_poll(iter, filp, poll_table);
6660 }
6661
6662 static ssize_t
6663 tracing_buffers_read(struct file *filp, char __user *ubuf,
6664                      size_t count, loff_t *ppos)
6665 {
6666         struct ftrace_buffer_info *info = filp->private_data;
6667         struct trace_iterator *iter = &info->iter;
6668         ssize_t ret = 0;
6669         ssize_t size;
6670
6671         if (!count)
6672                 return 0;
6673
6674 #ifdef CONFIG_TRACER_MAX_TRACE
6675         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6676                 return -EBUSY;
6677 #endif
6678
6679         if (!info->spare) {
6680                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6681                                                           iter->cpu_file);
6682                 if (IS_ERR(info->spare)) {
6683                         ret = PTR_ERR(info->spare);
6684                         info->spare = NULL;
6685                 } else {
6686                         info->spare_cpu = iter->cpu_file;
6687                 }
6688         }
6689         if (!info->spare)
6690                 return ret;
6691
6692         /* Do we have previous read data to read? */
6693         if (info->read < PAGE_SIZE)
6694                 goto read;
6695
6696  again:
6697         trace_access_lock(iter->cpu_file);
6698         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6699                                     &info->spare,
6700                                     count,
6701                                     iter->cpu_file, 0);
6702         trace_access_unlock(iter->cpu_file);
6703
6704         if (ret < 0) {
6705                 if (trace_empty(iter)) {
6706                         if ((filp->f_flags & O_NONBLOCK))
6707                                 return -EAGAIN;
6708
6709                         ret = wait_on_pipe(iter, false);
6710                         if (ret)
6711                                 return ret;
6712
6713                         goto again;
6714                 }
6715                 return 0;
6716         }
6717
6718         info->read = 0;
6719  read:
6720         size = PAGE_SIZE - info->read;
6721         if (size > count)
6722                 size = count;
6723
6724         ret = copy_to_user(ubuf, info->spare + info->read, size);
6725         if (ret == size)
6726                 return -EFAULT;
6727
6728         size -= ret;
6729
6730         *ppos += size;
6731         info->read += size;
6732
6733         return size;
6734 }
6735
6736 static int tracing_buffers_release(struct inode *inode, struct file *file)
6737 {
6738         struct ftrace_buffer_info *info = file->private_data;
6739         struct trace_iterator *iter = &info->iter;
6740
6741         mutex_lock(&trace_types_lock);
6742
6743         iter->tr->current_trace->ref--;
6744
6745         __trace_array_put(iter->tr);
6746
6747         if (info->spare)
6748                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6749                                            info->spare_cpu, info->spare);
6750         kfree(info);
6751
6752         mutex_unlock(&trace_types_lock);
6753
6754         return 0;
6755 }
6756
6757 struct buffer_ref {
6758         struct ring_buffer      *buffer;
6759         void                    *page;
6760         int                     cpu;
6761         refcount_t              refcount;
6762 };
6763
6764 static void buffer_ref_release(struct buffer_ref *ref)
6765 {
6766         if (!refcount_dec_and_test(&ref->refcount))
6767                 return;
6768         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6769         kfree(ref);
6770 }
6771
6772 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6773                                     struct pipe_buffer *buf)
6774 {
6775         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6776
6777         buffer_ref_release(ref);
6778         buf->private = 0;
6779 }
6780
6781 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6782                                 struct pipe_buffer *buf)
6783 {
6784         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6785
6786         if (refcount_read(&ref->refcount) > INT_MAX/2)
6787                 return false;
6788
6789         refcount_inc(&ref->refcount);
6790         return true;
6791 }
6792
6793 /* Pipe buffer operations for a buffer. */
6794 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6795         .can_merge              = 0,
6796         .confirm                = generic_pipe_buf_confirm,
6797         .release                = buffer_pipe_buf_release,
6798         .steal                  = generic_pipe_buf_nosteal,
6799         .get                    = buffer_pipe_buf_get,
6800 };
6801
6802 /*
6803  * Callback from splice_to_pipe(), if we need to release some pages
6804  * at the end of the spd in case we error'ed out in filling the pipe.
6805  */
6806 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6807 {
6808         struct buffer_ref *ref =
6809                 (struct buffer_ref *)spd->partial[i].private;
6810
6811         buffer_ref_release(ref);
6812         spd->partial[i].private = 0;
6813 }
6814
6815 static ssize_t
6816 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6817                             struct pipe_inode_info *pipe, size_t len,
6818                             unsigned int flags)
6819 {
6820         struct ftrace_buffer_info *info = file->private_data;
6821         struct trace_iterator *iter = &info->iter;
6822         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6823         struct page *pages_def[PIPE_DEF_BUFFERS];
6824         struct splice_pipe_desc spd = {
6825                 .pages          = pages_def,
6826                 .partial        = partial_def,
6827                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6828                 .ops            = &buffer_pipe_buf_ops,
6829                 .spd_release    = buffer_spd_release,
6830         };
6831         struct buffer_ref *ref;
6832         int entries, i;
6833         ssize_t ret = 0;
6834
6835 #ifdef CONFIG_TRACER_MAX_TRACE
6836         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6837                 return -EBUSY;
6838 #endif
6839
6840         if (*ppos & (PAGE_SIZE - 1))
6841                 return -EINVAL;
6842
6843         if (len & (PAGE_SIZE - 1)) {
6844                 if (len < PAGE_SIZE)
6845                         return -EINVAL;
6846                 len &= PAGE_MASK;
6847         }
6848
6849         if (splice_grow_spd(pipe, &spd))
6850                 return -ENOMEM;
6851
6852  again:
6853         trace_access_lock(iter->cpu_file);
6854         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6855
6856         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6857                 struct page *page;
6858                 int r;
6859
6860                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6861                 if (!ref) {
6862                         ret = -ENOMEM;
6863                         break;
6864                 }
6865
6866                 refcount_set(&ref->refcount, 1);
6867                 ref->buffer = iter->trace_buffer->buffer;
6868                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6869                 if (IS_ERR(ref->page)) {
6870                         ret = PTR_ERR(ref->page);
6871                         ref->page = NULL;
6872                         kfree(ref);
6873                         break;
6874                 }
6875                 ref->cpu = iter->cpu_file;
6876
6877                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6878                                           len, iter->cpu_file, 1);
6879                 if (r < 0) {
6880                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6881                                                    ref->page);
6882                         kfree(ref);
6883                         break;
6884                 }
6885
6886                 page = virt_to_page(ref->page);
6887
6888                 spd.pages[i] = page;
6889                 spd.partial[i].len = PAGE_SIZE;
6890                 spd.partial[i].offset = 0;
6891                 spd.partial[i].private = (unsigned long)ref;
6892                 spd.nr_pages++;
6893                 *ppos += PAGE_SIZE;
6894
6895                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6896         }
6897
6898         trace_access_unlock(iter->cpu_file);
6899         spd.nr_pages = i;
6900
6901         /* did we read anything? */
6902         if (!spd.nr_pages) {
6903                 if (ret)
6904                         goto out;
6905
6906                 ret = -EAGAIN;
6907                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6908                         goto out;
6909
6910                 ret = wait_on_pipe(iter, true);
6911                 if (ret)
6912                         goto out;
6913
6914                 goto again;
6915         }
6916
6917         ret = splice_to_pipe(pipe, &spd);
6918 out:
6919         splice_shrink_spd(&spd);
6920
6921         return ret;
6922 }
6923
6924 static const struct file_operations tracing_buffers_fops = {
6925         .open           = tracing_buffers_open,
6926         .read           = tracing_buffers_read,
6927         .poll           = tracing_buffers_poll,
6928         .release        = tracing_buffers_release,
6929         .splice_read    = tracing_buffers_splice_read,
6930         .llseek         = no_llseek,
6931 };
6932
6933 static ssize_t
6934 tracing_stats_read(struct file *filp, char __user *ubuf,
6935                    size_t count, loff_t *ppos)
6936 {
6937         struct inode *inode = file_inode(filp);
6938         struct trace_array *tr = inode->i_private;
6939         struct trace_buffer *trace_buf = &tr->trace_buffer;
6940         int cpu = tracing_get_cpu(inode);
6941         struct trace_seq *s;
6942         unsigned long cnt;
6943         unsigned long long t;
6944         unsigned long usec_rem;
6945
6946         s = kmalloc(sizeof(*s), GFP_KERNEL);
6947         if (!s)
6948                 return -ENOMEM;
6949
6950         trace_seq_init(s);
6951
6952         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6953         trace_seq_printf(s, "entries: %ld\n", cnt);
6954
6955         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6956         trace_seq_printf(s, "overrun: %ld\n", cnt);
6957
6958         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6959         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6960
6961         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6962         trace_seq_printf(s, "bytes: %ld\n", cnt);
6963
6964         if (trace_clocks[tr->clock_id].in_ns) {
6965                 /* local or global for trace_clock */
6966                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6967                 usec_rem = do_div(t, USEC_PER_SEC);
6968                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6969                                                                 t, usec_rem);
6970
6971                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6972                 usec_rem = do_div(t, USEC_PER_SEC);
6973                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6974         } else {
6975                 /* counter or tsc mode for trace_clock */
6976                 trace_seq_printf(s, "oldest event ts: %llu\n",
6977                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6978
6979                 trace_seq_printf(s, "now ts: %llu\n",
6980                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6981         }
6982
6983         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6984         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6985
6986         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6987         trace_seq_printf(s, "read events: %ld\n", cnt);
6988
6989         count = simple_read_from_buffer(ubuf, count, ppos,
6990                                         s->buffer, trace_seq_used(s));
6991
6992         kfree(s);
6993
6994         return count;
6995 }
6996
6997 static const struct file_operations tracing_stats_fops = {
6998         .open           = tracing_open_generic_tr,
6999         .read           = tracing_stats_read,
7000         .llseek         = generic_file_llseek,
7001         .release        = tracing_release_generic_tr,
7002 };
7003
7004 #ifdef CONFIG_DYNAMIC_FTRACE
7005
7006 static ssize_t
7007 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7008                   size_t cnt, loff_t *ppos)
7009 {
7010         unsigned long *p = filp->private_data;
7011         char buf[64]; /* Not too big for a shallow stack */
7012         int r;
7013
7014         r = scnprintf(buf, 63, "%ld", *p);
7015         buf[r++] = '\n';
7016
7017         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7018 }
7019
7020 static const struct file_operations tracing_dyn_info_fops = {
7021         .open           = tracing_open_generic,
7022         .read           = tracing_read_dyn_info,
7023         .llseek         = generic_file_llseek,
7024 };
7025 #endif /* CONFIG_DYNAMIC_FTRACE */
7026
7027 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7028 static void
7029 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7030                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7031                 void *data)
7032 {
7033         tracing_snapshot_instance(tr);
7034 }
7035
7036 static void
7037 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7038                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7039                       void *data)
7040 {
7041         struct ftrace_func_mapper *mapper = data;
7042         long *count = NULL;
7043
7044         if (mapper)
7045                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7046
7047         if (count) {
7048
7049                 if (*count <= 0)
7050                         return;
7051
7052                 (*count)--;
7053         }
7054
7055         tracing_snapshot_instance(tr);
7056 }
7057
7058 static int
7059 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7060                       struct ftrace_probe_ops *ops, void *data)
7061 {
7062         struct ftrace_func_mapper *mapper = data;
7063         long *count = NULL;
7064
7065         seq_printf(m, "%ps:", (void *)ip);
7066
7067         seq_puts(m, "snapshot");
7068
7069         if (mapper)
7070                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7071
7072         if (count)
7073                 seq_printf(m, ":count=%ld\n", *count);
7074         else
7075                 seq_puts(m, ":unlimited\n");
7076
7077         return 0;
7078 }
7079
7080 static int
7081 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7082                      unsigned long ip, void *init_data, void **data)
7083 {
7084         struct ftrace_func_mapper *mapper = *data;
7085
7086         if (!mapper) {
7087                 mapper = allocate_ftrace_func_mapper();
7088                 if (!mapper)
7089                         return -ENOMEM;
7090                 *data = mapper;
7091         }
7092
7093         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7094 }
7095
7096 static void
7097 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7098                      unsigned long ip, void *data)
7099 {
7100         struct ftrace_func_mapper *mapper = data;
7101
7102         if (!ip) {
7103                 if (!mapper)
7104                         return;
7105                 free_ftrace_func_mapper(mapper, NULL);
7106                 return;
7107         }
7108
7109         ftrace_func_mapper_remove_ip(mapper, ip);
7110 }
7111
7112 static struct ftrace_probe_ops snapshot_probe_ops = {
7113         .func                   = ftrace_snapshot,
7114         .print                  = ftrace_snapshot_print,
7115 };
7116
7117 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7118         .func                   = ftrace_count_snapshot,
7119         .print                  = ftrace_snapshot_print,
7120         .init                   = ftrace_snapshot_init,
7121         .free                   = ftrace_snapshot_free,
7122 };
7123
7124 static int
7125 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7126                                char *glob, char *cmd, char *param, int enable)
7127 {
7128         struct ftrace_probe_ops *ops;
7129         void *count = (void *)-1;
7130         char *number;
7131         int ret;
7132
7133         if (!tr)
7134                 return -ENODEV;
7135
7136         /* hash funcs only work with set_ftrace_filter */
7137         if (!enable)
7138                 return -EINVAL;
7139
7140         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7141
7142         if (glob[0] == '!')
7143                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7144
7145         if (!param)
7146                 goto out_reg;
7147
7148         number = strsep(&param, ":");
7149
7150         if (!strlen(number))
7151                 goto out_reg;
7152
7153         /*
7154          * We use the callback data field (which is a pointer)
7155          * as our counter.
7156          */
7157         ret = kstrtoul(number, 0, (unsigned long *)&count);
7158         if (ret)
7159                 return ret;
7160
7161  out_reg:
7162         ret = tracing_alloc_snapshot_instance(tr);
7163         if (ret < 0)
7164                 goto out;
7165
7166         ret = register_ftrace_function_probe(glob, tr, ops, count);
7167
7168  out:
7169         return ret < 0 ? ret : 0;
7170 }
7171
7172 static struct ftrace_func_command ftrace_snapshot_cmd = {
7173         .name                   = "snapshot",
7174         .func                   = ftrace_trace_snapshot_callback,
7175 };
7176
7177 static __init int register_snapshot_cmd(void)
7178 {
7179         return register_ftrace_command(&ftrace_snapshot_cmd);
7180 }
7181 #else
7182 static inline __init int register_snapshot_cmd(void) { return 0; }
7183 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7184
7185 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7186 {
7187         if (WARN_ON(!tr->dir))
7188                 return ERR_PTR(-ENODEV);
7189
7190         /* Top directory uses NULL as the parent */
7191         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7192                 return NULL;
7193
7194         /* All sub buffers have a descriptor */
7195         return tr->dir;
7196 }
7197
7198 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7199 {
7200         struct dentry *d_tracer;
7201
7202         if (tr->percpu_dir)
7203                 return tr->percpu_dir;
7204
7205         d_tracer = tracing_get_dentry(tr);
7206         if (IS_ERR(d_tracer))
7207                 return NULL;
7208
7209         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7210
7211         WARN_ONCE(!tr->percpu_dir,
7212                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7213
7214         return tr->percpu_dir;
7215 }
7216
7217 static struct dentry *
7218 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7219                       void *data, long cpu, const struct file_operations *fops)
7220 {
7221         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7222
7223         if (ret) /* See tracing_get_cpu() */
7224                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7225         return ret;
7226 }
7227
7228 static void
7229 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7230 {
7231         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7232         struct dentry *d_cpu;
7233         char cpu_dir[30]; /* 30 characters should be more than enough */
7234
7235         if (!d_percpu)
7236                 return;
7237
7238         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7239         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7240         if (!d_cpu) {
7241                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7242                 return;
7243         }
7244
7245         /* per cpu trace_pipe */
7246         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7247                                 tr, cpu, &tracing_pipe_fops);
7248
7249         /* per cpu trace */
7250         trace_create_cpu_file("trace", 0644, d_cpu,
7251                                 tr, cpu, &tracing_fops);
7252
7253         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7254                                 tr, cpu, &tracing_buffers_fops);
7255
7256         trace_create_cpu_file("stats", 0444, d_cpu,
7257                                 tr, cpu, &tracing_stats_fops);
7258
7259         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7260                                 tr, cpu, &tracing_entries_fops);
7261
7262 #ifdef CONFIG_TRACER_SNAPSHOT
7263         trace_create_cpu_file("snapshot", 0644, d_cpu,
7264                                 tr, cpu, &snapshot_fops);
7265
7266         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7267                                 tr, cpu, &snapshot_raw_fops);
7268 #endif
7269 }
7270
7271 #ifdef CONFIG_FTRACE_SELFTEST
7272 /* Let selftest have access to static functions in this file */
7273 #include "trace_selftest.c"
7274 #endif
7275
7276 static ssize_t
7277 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7278                         loff_t *ppos)
7279 {
7280         struct trace_option_dentry *topt = filp->private_data;
7281         char *buf;
7282
7283         if (topt->flags->val & topt->opt->bit)
7284                 buf = "1\n";
7285         else
7286                 buf = "0\n";
7287
7288         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7289 }
7290
7291 static ssize_t
7292 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7293                          loff_t *ppos)
7294 {
7295         struct trace_option_dentry *topt = filp->private_data;
7296         unsigned long val;
7297         int ret;
7298
7299         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7300         if (ret)
7301                 return ret;
7302
7303         if (val != 0 && val != 1)
7304                 return -EINVAL;
7305
7306         if (!!(topt->flags->val & topt->opt->bit) != val) {
7307                 mutex_lock(&trace_types_lock);
7308                 ret = __set_tracer_option(topt->tr, topt->flags,
7309                                           topt->opt, !val);
7310                 mutex_unlock(&trace_types_lock);
7311                 if (ret)
7312                         return ret;
7313         }
7314
7315         *ppos += cnt;
7316
7317         return cnt;
7318 }
7319
7320
7321 static const struct file_operations trace_options_fops = {
7322         .open = tracing_open_generic,
7323         .read = trace_options_read,
7324         .write = trace_options_write,
7325         .llseek = generic_file_llseek,
7326 };
7327
7328 /*
7329  * In order to pass in both the trace_array descriptor as well as the index
7330  * to the flag that the trace option file represents, the trace_array
7331  * has a character array of trace_flags_index[], which holds the index
7332  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7333  * The address of this character array is passed to the flag option file
7334  * read/write callbacks.
7335  *
7336  * In order to extract both the index and the trace_array descriptor,
7337  * get_tr_index() uses the following algorithm.
7338  *
7339  *   idx = *ptr;
7340  *
7341  * As the pointer itself contains the address of the index (remember
7342  * index[1] == 1).
7343  *
7344  * Then to get the trace_array descriptor, by subtracting that index
7345  * from the ptr, we get to the start of the index itself.
7346  *
7347  *   ptr - idx == &index[0]
7348  *
7349  * Then a simple container_of() from that pointer gets us to the
7350  * trace_array descriptor.
7351  */
7352 static void get_tr_index(void *data, struct trace_array **ptr,
7353                          unsigned int *pindex)
7354 {
7355         *pindex = *(unsigned char *)data;
7356
7357         *ptr = container_of(data - *pindex, struct trace_array,
7358                             trace_flags_index);
7359 }
7360
7361 static ssize_t
7362 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7363                         loff_t *ppos)
7364 {
7365         void *tr_index = filp->private_data;
7366         struct trace_array *tr;
7367         unsigned int index;
7368         char *buf;
7369
7370         get_tr_index(tr_index, &tr, &index);
7371
7372         if (tr->trace_flags & (1 << index))
7373                 buf = "1\n";
7374         else
7375                 buf = "0\n";
7376
7377         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7378 }
7379
7380 static ssize_t
7381 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7382                          loff_t *ppos)
7383 {
7384         void *tr_index = filp->private_data;
7385         struct trace_array *tr;
7386         unsigned int index;
7387         unsigned long val;
7388         int ret;
7389
7390         get_tr_index(tr_index, &tr, &index);
7391
7392         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7393         if (ret)
7394                 return ret;
7395
7396         if (val != 0 && val != 1)
7397                 return -EINVAL;
7398
7399         mutex_lock(&event_mutex);
7400         mutex_lock(&trace_types_lock);
7401         ret = set_tracer_flag(tr, 1 << index, val);
7402         mutex_unlock(&trace_types_lock);
7403         mutex_unlock(&event_mutex);
7404
7405         if (ret < 0)
7406                 return ret;
7407
7408         *ppos += cnt;
7409
7410         return cnt;
7411 }
7412
7413 static const struct file_operations trace_options_core_fops = {
7414         .open = tracing_open_generic,
7415         .read = trace_options_core_read,
7416         .write = trace_options_core_write,
7417         .llseek = generic_file_llseek,
7418 };
7419
7420 struct dentry *trace_create_file(const char *name,
7421                                  umode_t mode,
7422                                  struct dentry *parent,
7423                                  void *data,
7424                                  const struct file_operations *fops)
7425 {
7426         struct dentry *ret;
7427
7428         ret = tracefs_create_file(name, mode, parent, data, fops);
7429         if (!ret)
7430                 pr_warn("Could not create tracefs '%s' entry\n", name);
7431
7432         return ret;
7433 }
7434
7435
7436 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7437 {
7438         struct dentry *d_tracer;
7439
7440         if (tr->options)
7441                 return tr->options;
7442
7443         d_tracer = tracing_get_dentry(tr);
7444         if (IS_ERR(d_tracer))
7445                 return NULL;
7446
7447         tr->options = tracefs_create_dir("options", d_tracer);
7448         if (!tr->options) {
7449                 pr_warn("Could not create tracefs directory 'options'\n");
7450                 return NULL;
7451         }
7452
7453         return tr->options;
7454 }
7455
7456 static void
7457 create_trace_option_file(struct trace_array *tr,
7458                          struct trace_option_dentry *topt,
7459                          struct tracer_flags *flags,
7460                          struct tracer_opt *opt)
7461 {
7462         struct dentry *t_options;
7463
7464         t_options = trace_options_init_dentry(tr);
7465         if (!t_options)
7466                 return;
7467
7468         topt->flags = flags;
7469         topt->opt = opt;
7470         topt->tr = tr;
7471
7472         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7473                                     &trace_options_fops);
7474
7475 }
7476
7477 static void
7478 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7479 {
7480         struct trace_option_dentry *topts;
7481         struct trace_options *tr_topts;
7482         struct tracer_flags *flags;
7483         struct tracer_opt *opts;
7484         int cnt;
7485         int i;
7486
7487         if (!tracer)
7488                 return;
7489
7490         flags = tracer->flags;
7491
7492         if (!flags || !flags->opts)
7493                 return;
7494
7495         /*
7496          * If this is an instance, only create flags for tracers
7497          * the instance may have.
7498          */
7499         if (!trace_ok_for_array(tracer, tr))
7500                 return;
7501
7502         for (i = 0; i < tr->nr_topts; i++) {
7503                 /* Make sure there's no duplicate flags. */
7504                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7505                         return;
7506         }
7507
7508         opts = flags->opts;
7509
7510         for (cnt = 0; opts[cnt].name; cnt++)
7511                 ;
7512
7513         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7514         if (!topts)
7515                 return;
7516
7517         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7518                             GFP_KERNEL);
7519         if (!tr_topts) {
7520                 kfree(topts);
7521                 return;
7522         }
7523
7524         tr->topts = tr_topts;
7525         tr->topts[tr->nr_topts].tracer = tracer;
7526         tr->topts[tr->nr_topts].topts = topts;
7527         tr->nr_topts++;
7528
7529         for (cnt = 0; opts[cnt].name; cnt++) {
7530                 create_trace_option_file(tr, &topts[cnt], flags,
7531                                          &opts[cnt]);
7532                 WARN_ONCE(topts[cnt].entry == NULL,
7533                           "Failed to create trace option: %s",
7534                           opts[cnt].name);
7535         }
7536 }
7537
7538 static struct dentry *
7539 create_trace_option_core_file(struct trace_array *tr,
7540                               const char *option, long index)
7541 {
7542         struct dentry *t_options;
7543
7544         t_options = trace_options_init_dentry(tr);
7545         if (!t_options)
7546                 return NULL;
7547
7548         return trace_create_file(option, 0644, t_options,
7549                                  (void *)&tr->trace_flags_index[index],
7550                                  &trace_options_core_fops);
7551 }
7552
7553 static void create_trace_options_dir(struct trace_array *tr)
7554 {
7555         struct dentry *t_options;
7556         bool top_level = tr == &global_trace;
7557         int i;
7558
7559         t_options = trace_options_init_dentry(tr);
7560         if (!t_options)
7561                 return;
7562
7563         for (i = 0; trace_options[i]; i++) {
7564                 if (top_level ||
7565                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7566                         create_trace_option_core_file(tr, trace_options[i], i);
7567         }
7568 }
7569
7570 static ssize_t
7571 rb_simple_read(struct file *filp, char __user *ubuf,
7572                size_t cnt, loff_t *ppos)
7573 {
7574         struct trace_array *tr = filp->private_data;
7575         char buf[64];
7576         int r;
7577
7578         r = tracer_tracing_is_on(tr);
7579         r = sprintf(buf, "%d\n", r);
7580
7581         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7582 }
7583
7584 static ssize_t
7585 rb_simple_write(struct file *filp, const char __user *ubuf,
7586                 size_t cnt, loff_t *ppos)
7587 {
7588         struct trace_array *tr = filp->private_data;
7589         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7590         unsigned long val;
7591         int ret;
7592
7593         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7594         if (ret)
7595                 return ret;
7596
7597         if (buffer) {
7598                 mutex_lock(&trace_types_lock);
7599                 if (!!val == tracer_tracing_is_on(tr)) {
7600                         val = 0; /* do nothing */
7601                 } else if (val) {
7602                         tracer_tracing_on(tr);
7603                         if (tr->current_trace->start)
7604                                 tr->current_trace->start(tr);
7605                 } else {
7606                         tracer_tracing_off(tr);
7607                         if (tr->current_trace->stop)
7608                                 tr->current_trace->stop(tr);
7609                 }
7610                 mutex_unlock(&trace_types_lock);
7611         }
7612
7613         (*ppos)++;
7614
7615         return cnt;
7616 }
7617
7618 static const struct file_operations rb_simple_fops = {
7619         .open           = tracing_open_generic_tr,
7620         .read           = rb_simple_read,
7621         .write          = rb_simple_write,
7622         .release        = tracing_release_generic_tr,
7623         .llseek         = default_llseek,
7624 };
7625
7626 struct dentry *trace_instance_dir;
7627
7628 static void
7629 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7630
7631 static int
7632 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7633 {
7634         enum ring_buffer_flags rb_flags;
7635
7636         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7637
7638         buf->tr = tr;
7639
7640         buf->buffer = ring_buffer_alloc(size, rb_flags);
7641         if (!buf->buffer)
7642                 return -ENOMEM;
7643
7644         buf->data = alloc_percpu(struct trace_array_cpu);
7645         if (!buf->data) {
7646                 ring_buffer_free(buf->buffer);
7647                 buf->buffer = NULL;
7648                 return -ENOMEM;
7649         }
7650
7651         /* Allocate the first page for all buffers */
7652         set_buffer_entries(&tr->trace_buffer,
7653                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7654
7655         return 0;
7656 }
7657
7658 static int allocate_trace_buffers(struct trace_array *tr, int size)
7659 {
7660         int ret;
7661
7662         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7663         if (ret)
7664                 return ret;
7665
7666 #ifdef CONFIG_TRACER_MAX_TRACE
7667         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7668                                     allocate_snapshot ? size : 1);
7669         if (WARN_ON(ret)) {
7670                 ring_buffer_free(tr->trace_buffer.buffer);
7671                 tr->trace_buffer.buffer = NULL;
7672                 free_percpu(tr->trace_buffer.data);
7673                 tr->trace_buffer.data = NULL;
7674                 return -ENOMEM;
7675         }
7676         tr->allocated_snapshot = allocate_snapshot;
7677
7678         /*
7679          * Only the top level trace array gets its snapshot allocated
7680          * from the kernel command line.
7681          */
7682         allocate_snapshot = false;
7683 #endif
7684
7685         /*
7686          * Because of some magic with the way alloc_percpu() works on
7687          * x86_64, we need to synchronize the pgd of all the tables,
7688          * otherwise the trace events that happen in x86_64 page fault
7689          * handlers can't cope with accessing the chance that a
7690          * alloc_percpu()'d memory might be touched in the page fault trace
7691          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7692          * calls in tracing, because something might get triggered within a
7693          * page fault trace event!
7694          */
7695         vmalloc_sync_mappings();
7696
7697         return 0;
7698 }
7699
7700 static void free_trace_buffer(struct trace_buffer *buf)
7701 {
7702         if (buf->buffer) {
7703                 ring_buffer_free(buf->buffer);
7704                 buf->buffer = NULL;
7705                 free_percpu(buf->data);
7706                 buf->data = NULL;
7707         }
7708 }
7709
7710 static void free_trace_buffers(struct trace_array *tr)
7711 {
7712         if (!tr)
7713                 return;
7714
7715         free_trace_buffer(&tr->trace_buffer);
7716
7717 #ifdef CONFIG_TRACER_MAX_TRACE
7718         free_trace_buffer(&tr->max_buffer);
7719 #endif
7720 }
7721
7722 static void init_trace_flags_index(struct trace_array *tr)
7723 {
7724         int i;
7725
7726         /* Used by the trace options files */
7727         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7728                 tr->trace_flags_index[i] = i;
7729 }
7730
7731 static void __update_tracer_options(struct trace_array *tr)
7732 {
7733         struct tracer *t;
7734
7735         for (t = trace_types; t; t = t->next)
7736                 add_tracer_options(tr, t);
7737 }
7738
7739 static void update_tracer_options(struct trace_array *tr)
7740 {
7741         mutex_lock(&trace_types_lock);
7742         tracer_options_updated = true;
7743         __update_tracer_options(tr);
7744         mutex_unlock(&trace_types_lock);
7745 }
7746
7747 static int instance_mkdir(const char *name)
7748 {
7749         struct trace_array *tr;
7750         int ret;
7751
7752         mutex_lock(&event_mutex);
7753         mutex_lock(&trace_types_lock);
7754
7755         ret = -EEXIST;
7756         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7757                 if (tr->name && strcmp(tr->name, name) == 0)
7758                         goto out_unlock;
7759         }
7760
7761         ret = -ENOMEM;
7762         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7763         if (!tr)
7764                 goto out_unlock;
7765
7766         tr->name = kstrdup(name, GFP_KERNEL);
7767         if (!tr->name)
7768                 goto out_free_tr;
7769
7770         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7771                 goto out_free_tr;
7772
7773         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7774
7775         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7776
7777         raw_spin_lock_init(&tr->start_lock);
7778
7779         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7780
7781         tr->current_trace = &nop_trace;
7782
7783         INIT_LIST_HEAD(&tr->systems);
7784         INIT_LIST_HEAD(&tr->events);
7785
7786         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7787                 goto out_free_tr;
7788
7789         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7790         if (!tr->dir)
7791                 goto out_free_tr;
7792
7793         ret = event_trace_add_tracer(tr->dir, tr);
7794         if (ret) {
7795                 tracefs_remove_recursive(tr->dir);
7796                 goto out_free_tr;
7797         }
7798
7799         ftrace_init_trace_array(tr);
7800
7801         init_tracer_tracefs(tr, tr->dir);
7802         init_trace_flags_index(tr);
7803         __update_tracer_options(tr);
7804
7805         list_add(&tr->list, &ftrace_trace_arrays);
7806
7807         mutex_unlock(&trace_types_lock);
7808         mutex_unlock(&event_mutex);
7809
7810         return 0;
7811
7812  out_free_tr:
7813         free_trace_buffers(tr);
7814         free_cpumask_var(tr->tracing_cpumask);
7815         kfree(tr->name);
7816         kfree(tr);
7817
7818  out_unlock:
7819         mutex_unlock(&trace_types_lock);
7820         mutex_unlock(&event_mutex);
7821
7822         return ret;
7823
7824 }
7825
7826 static int instance_rmdir(const char *name)
7827 {
7828         struct trace_array *tr;
7829         int found = 0;
7830         int ret;
7831         int i;
7832
7833         mutex_lock(&event_mutex);
7834         mutex_lock(&trace_types_lock);
7835
7836         ret = -ENODEV;
7837         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7838                 if (tr->name && strcmp(tr->name, name) == 0) {
7839                         found = 1;
7840                         break;
7841                 }
7842         }
7843         if (!found)
7844                 goto out_unlock;
7845
7846         ret = -EBUSY;
7847         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7848                 goto out_unlock;
7849
7850         list_del(&tr->list);
7851
7852         /* Disable all the flags that were enabled coming in */
7853         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7854                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7855                         set_tracer_flag(tr, 1 << i, 0);
7856         }
7857
7858         tracing_set_nop(tr);
7859         clear_ftrace_function_probes(tr);
7860         event_trace_del_tracer(tr);
7861         ftrace_clear_pids(tr);
7862         ftrace_destroy_function_files(tr);
7863         tracefs_remove_recursive(tr->dir);
7864         free_trace_buffers(tr);
7865
7866         for (i = 0; i < tr->nr_topts; i++) {
7867                 kfree(tr->topts[i].topts);
7868         }
7869         kfree(tr->topts);
7870
7871         free_cpumask_var(tr->tracing_cpumask);
7872         kfree(tr->name);
7873         kfree(tr);
7874
7875         ret = 0;
7876
7877  out_unlock:
7878         mutex_unlock(&trace_types_lock);
7879         mutex_unlock(&event_mutex);
7880
7881         return ret;
7882 }
7883
7884 static __init void create_trace_instances(struct dentry *d_tracer)
7885 {
7886         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7887                                                          instance_mkdir,
7888                                                          instance_rmdir);
7889         if (WARN_ON(!trace_instance_dir))
7890                 return;
7891 }
7892
7893 static void
7894 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7895 {
7896         int cpu;
7897
7898         trace_create_file("available_tracers", 0444, d_tracer,
7899                         tr, &show_traces_fops);
7900
7901         trace_create_file("current_tracer", 0644, d_tracer,
7902                         tr, &set_tracer_fops);
7903
7904         trace_create_file("tracing_cpumask", 0644, d_tracer,
7905                           tr, &tracing_cpumask_fops);
7906
7907         trace_create_file("trace_options", 0644, d_tracer,
7908                           tr, &tracing_iter_fops);
7909
7910         trace_create_file("trace", 0644, d_tracer,
7911                           tr, &tracing_fops);
7912
7913         trace_create_file("trace_pipe", 0444, d_tracer,
7914                           tr, &tracing_pipe_fops);
7915
7916         trace_create_file("buffer_size_kb", 0644, d_tracer,
7917                           tr, &tracing_entries_fops);
7918
7919         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7920                           tr, &tracing_total_entries_fops);
7921
7922         trace_create_file("free_buffer", 0200, d_tracer,
7923                           tr, &tracing_free_buffer_fops);
7924
7925         trace_create_file("trace_marker", 0220, d_tracer,
7926                           tr, &tracing_mark_fops);
7927
7928         trace_create_file("trace_marker_raw", 0220, d_tracer,
7929                           tr, &tracing_mark_raw_fops);
7930
7931         trace_create_file("trace_clock", 0644, d_tracer, tr,
7932                           &trace_clock_fops);
7933
7934         trace_create_file("tracing_on", 0644, d_tracer,
7935                           tr, &rb_simple_fops);
7936
7937         create_trace_options_dir(tr);
7938
7939 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7940         trace_create_file("tracing_max_latency", 0644, d_tracer,
7941                         &tr->max_latency, &tracing_max_lat_fops);
7942 #endif
7943
7944         if (ftrace_create_function_files(tr, d_tracer))
7945                 WARN(1, "Could not allocate function filter files");
7946
7947 #ifdef CONFIG_TRACER_SNAPSHOT
7948         trace_create_file("snapshot", 0644, d_tracer,
7949                           tr, &snapshot_fops);
7950 #endif
7951
7952         for_each_tracing_cpu(cpu)
7953                 tracing_init_tracefs_percpu(tr, cpu);
7954
7955         ftrace_init_tracefs(tr, d_tracer);
7956 }
7957
7958 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7959 {
7960         struct vfsmount *mnt;
7961         struct file_system_type *type;
7962
7963         /*
7964          * To maintain backward compatibility for tools that mount
7965          * debugfs to get to the tracing facility, tracefs is automatically
7966          * mounted to the debugfs/tracing directory.
7967          */
7968         type = get_fs_type("tracefs");
7969         if (!type)
7970                 return NULL;
7971         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7972         put_filesystem(type);
7973         if (IS_ERR(mnt))
7974                 return NULL;
7975         mntget(mnt);
7976
7977         return mnt;
7978 }
7979
7980 /**
7981  * tracing_init_dentry - initialize top level trace array
7982  *
7983  * This is called when creating files or directories in the tracing
7984  * directory. It is called via fs_initcall() by any of the boot up code
7985  * and expects to return the dentry of the top level tracing directory.
7986  */
7987 struct dentry *tracing_init_dentry(void)
7988 {
7989         struct trace_array *tr = &global_trace;
7990
7991         /* The top level trace array uses  NULL as parent */
7992         if (tr->dir)
7993                 return NULL;
7994
7995         if (WARN_ON(!tracefs_initialized()) ||
7996                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7997                  WARN_ON(!debugfs_initialized())))
7998                 return ERR_PTR(-ENODEV);
7999
8000         /*
8001          * As there may still be users that expect the tracing
8002          * files to exist in debugfs/tracing, we must automount
8003          * the tracefs file system there, so older tools still
8004          * work with the newer kerenl.
8005          */
8006         tr->dir = debugfs_create_automount("tracing", NULL,
8007                                            trace_automount, NULL);
8008         if (!tr->dir) {
8009                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8010                 return ERR_PTR(-ENOMEM);
8011         }
8012
8013         return NULL;
8014 }
8015
8016 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8017 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8018
8019 static void __init trace_eval_init(void)
8020 {
8021         int len;
8022
8023         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8024         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8025 }
8026
8027 #ifdef CONFIG_MODULES
8028 static void trace_module_add_evals(struct module *mod)
8029 {
8030         if (!mod->num_trace_evals)
8031                 return;
8032
8033         /*
8034          * Modules with bad taint do not have events created, do
8035          * not bother with enums either.
8036          */
8037         if (trace_module_has_bad_taint(mod))
8038                 return;
8039
8040         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8041 }
8042
8043 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8044 static void trace_module_remove_evals(struct module *mod)
8045 {
8046         union trace_eval_map_item *map;
8047         union trace_eval_map_item **last = &trace_eval_maps;
8048
8049         if (!mod->num_trace_evals)
8050                 return;
8051
8052         mutex_lock(&trace_eval_mutex);
8053
8054         map = trace_eval_maps;
8055
8056         while (map) {
8057                 if (map->head.mod == mod)
8058                         break;
8059                 map = trace_eval_jmp_to_tail(map);
8060                 last = &map->tail.next;
8061                 map = map->tail.next;
8062         }
8063         if (!map)
8064                 goto out;
8065
8066         *last = trace_eval_jmp_to_tail(map)->tail.next;
8067         kfree(map);
8068  out:
8069         mutex_unlock(&trace_eval_mutex);
8070 }
8071 #else
8072 static inline void trace_module_remove_evals(struct module *mod) { }
8073 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8074
8075 static int trace_module_notify(struct notifier_block *self,
8076                                unsigned long val, void *data)
8077 {
8078         struct module *mod = data;
8079
8080         switch (val) {
8081         case MODULE_STATE_COMING:
8082                 trace_module_add_evals(mod);
8083                 break;
8084         case MODULE_STATE_GOING:
8085                 trace_module_remove_evals(mod);
8086                 break;
8087         }
8088
8089         return 0;
8090 }
8091
8092 static struct notifier_block trace_module_nb = {
8093         .notifier_call = trace_module_notify,
8094         .priority = 0,
8095 };
8096 #endif /* CONFIG_MODULES */
8097
8098 static __init int tracer_init_tracefs(void)
8099 {
8100         struct dentry *d_tracer;
8101
8102         trace_access_lock_init();
8103
8104         d_tracer = tracing_init_dentry();
8105         if (IS_ERR(d_tracer))
8106                 return 0;
8107
8108         init_tracer_tracefs(&global_trace, d_tracer);
8109         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8110
8111         trace_create_file("tracing_thresh", 0644, d_tracer,
8112                         &global_trace, &tracing_thresh_fops);
8113
8114         trace_create_file("README", 0444, d_tracer,
8115                         NULL, &tracing_readme_fops);
8116
8117         trace_create_file("saved_cmdlines", 0444, d_tracer,
8118                         NULL, &tracing_saved_cmdlines_fops);
8119
8120         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8121                           NULL, &tracing_saved_cmdlines_size_fops);
8122
8123         trace_create_file("saved_tgids", 0444, d_tracer,
8124                         NULL, &tracing_saved_tgids_fops);
8125
8126         trace_eval_init();
8127
8128         trace_create_eval_file(d_tracer);
8129
8130 #ifdef CONFIG_MODULES
8131         register_module_notifier(&trace_module_nb);
8132 #endif
8133
8134 #ifdef CONFIG_DYNAMIC_FTRACE
8135         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8136                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8137 #endif
8138
8139         create_trace_instances(d_tracer);
8140
8141         update_tracer_options(&global_trace);
8142
8143         return 0;
8144 }
8145
8146 static int trace_panic_handler(struct notifier_block *this,
8147                                unsigned long event, void *unused)
8148 {
8149         if (ftrace_dump_on_oops)
8150                 ftrace_dump(ftrace_dump_on_oops);
8151         return NOTIFY_OK;
8152 }
8153
8154 static struct notifier_block trace_panic_notifier = {
8155         .notifier_call  = trace_panic_handler,
8156         .next           = NULL,
8157         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8158 };
8159
8160 static int trace_die_handler(struct notifier_block *self,
8161                              unsigned long val,
8162                              void *data)
8163 {
8164         switch (val) {
8165         case DIE_OOPS:
8166                 if (ftrace_dump_on_oops)
8167                         ftrace_dump(ftrace_dump_on_oops);
8168                 break;
8169         default:
8170                 break;
8171         }
8172         return NOTIFY_OK;
8173 }
8174
8175 static struct notifier_block trace_die_notifier = {
8176         .notifier_call = trace_die_handler,
8177         .priority = 200
8178 };
8179
8180 /*
8181  * printk is set to max of 1024, we really don't need it that big.
8182  * Nothing should be printing 1000 characters anyway.
8183  */
8184 #define TRACE_MAX_PRINT         1000
8185
8186 /*
8187  * Define here KERN_TRACE so that we have one place to modify
8188  * it if we decide to change what log level the ftrace dump
8189  * should be at.
8190  */
8191 #define KERN_TRACE              KERN_EMERG
8192
8193 void
8194 trace_printk_seq(struct trace_seq *s)
8195 {
8196         /* Probably should print a warning here. */
8197         if (s->seq.len >= TRACE_MAX_PRINT)
8198                 s->seq.len = TRACE_MAX_PRINT;
8199
8200         /*
8201          * More paranoid code. Although the buffer size is set to
8202          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8203          * an extra layer of protection.
8204          */
8205         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8206                 s->seq.len = s->seq.size - 1;
8207
8208         /* should be zero ended, but we are paranoid. */
8209         s->buffer[s->seq.len] = 0;
8210
8211         printk(KERN_TRACE "%s", s->buffer);
8212
8213         trace_seq_init(s);
8214 }
8215
8216 void trace_init_global_iter(struct trace_iterator *iter)
8217 {
8218         iter->tr = &global_trace;
8219         iter->trace = iter->tr->current_trace;
8220         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8221         iter->trace_buffer = &global_trace.trace_buffer;
8222
8223         if (iter->trace && iter->trace->open)
8224                 iter->trace->open(iter);
8225
8226         /* Annotate start of buffers if we had overruns */
8227         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8228                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8229
8230         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8231         if (trace_clocks[iter->tr->clock_id].in_ns)
8232                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8233 }
8234
8235 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8236 {
8237         /* use static because iter can be a bit big for the stack */
8238         static struct trace_iterator iter;
8239         static atomic_t dump_running;
8240         struct trace_array *tr = &global_trace;
8241         unsigned int old_userobj;
8242         unsigned long flags;
8243         int cnt = 0, cpu;
8244
8245         /* Only allow one dump user at a time. */
8246         if (atomic_inc_return(&dump_running) != 1) {
8247                 atomic_dec(&dump_running);
8248                 return;
8249         }
8250
8251         /*
8252          * Always turn off tracing when we dump.
8253          * We don't need to show trace output of what happens
8254          * between multiple crashes.
8255          *
8256          * If the user does a sysrq-z, then they can re-enable
8257          * tracing with echo 1 > tracing_on.
8258          */
8259         tracing_off();
8260
8261         local_irq_save(flags);
8262         printk_nmi_direct_enter();
8263
8264         /* Simulate the iterator */
8265         trace_init_global_iter(&iter);
8266
8267         for_each_tracing_cpu(cpu) {
8268                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8269         }
8270
8271         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8272
8273         /* don't look at user memory in panic mode */
8274         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8275
8276         switch (oops_dump_mode) {
8277         case DUMP_ALL:
8278                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8279                 break;
8280         case DUMP_ORIG:
8281                 iter.cpu_file = raw_smp_processor_id();
8282                 break;
8283         case DUMP_NONE:
8284                 goto out_enable;
8285         default:
8286                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8287                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8288         }
8289
8290         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8291
8292         /* Did function tracer already get disabled? */
8293         if (ftrace_is_dead()) {
8294                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8295                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8296         }
8297
8298         /*
8299          * We need to stop all tracing on all CPUS to read the
8300          * the next buffer. This is a bit expensive, but is
8301          * not done often. We fill all what we can read,
8302          * and then release the locks again.
8303          */
8304
8305         while (!trace_empty(&iter)) {
8306
8307                 if (!cnt)
8308                         printk(KERN_TRACE "---------------------------------\n");
8309
8310                 cnt++;
8311
8312                 trace_iterator_reset(&iter);
8313                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8314
8315                 if (trace_find_next_entry_inc(&iter) != NULL) {
8316                         int ret;
8317
8318                         ret = print_trace_line(&iter);
8319                         if (ret != TRACE_TYPE_NO_CONSUME)
8320                                 trace_consume(&iter);
8321                 }
8322                 touch_nmi_watchdog();
8323
8324                 trace_printk_seq(&iter.seq);
8325         }
8326
8327         if (!cnt)
8328                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8329         else
8330                 printk(KERN_TRACE "---------------------------------\n");
8331
8332  out_enable:
8333         tr->trace_flags |= old_userobj;
8334
8335         for_each_tracing_cpu(cpu) {
8336                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8337         }
8338         atomic_dec(&dump_running);
8339         printk_nmi_direct_exit();
8340         local_irq_restore(flags);
8341 }
8342 EXPORT_SYMBOL_GPL(ftrace_dump);
8343
8344 __init static int tracer_alloc_buffers(void)
8345 {
8346         int ring_buf_size;
8347         int ret = -ENOMEM;
8348
8349         /*
8350          * Make sure we don't accidently add more trace options
8351          * than we have bits for.
8352          */
8353         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8354
8355         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8356                 goto out;
8357
8358         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8359                 goto out_free_buffer_mask;
8360
8361         /* Only allocate trace_printk buffers if a trace_printk exists */
8362         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8363                 /* Must be called before global_trace.buffer is allocated */
8364                 trace_printk_init_buffers();
8365
8366         /* To save memory, keep the ring buffer size to its minimum */
8367         if (ring_buffer_expanded)
8368                 ring_buf_size = trace_buf_size;
8369         else
8370                 ring_buf_size = 1;
8371
8372         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8373         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8374
8375         raw_spin_lock_init(&global_trace.start_lock);
8376
8377         /*
8378          * The prepare callbacks allocates some memory for the ring buffer. We
8379          * don't free the buffer if the if the CPU goes down. If we were to free
8380          * the buffer, then the user would lose any trace that was in the
8381          * buffer. The memory will be removed once the "instance" is removed.
8382          */
8383         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8384                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8385                                       NULL);
8386         if (ret < 0)
8387                 goto out_free_cpumask;
8388         /* Used for event triggers */
8389         ret = -ENOMEM;
8390         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8391         if (!temp_buffer)
8392                 goto out_rm_hp_state;
8393
8394         if (trace_create_savedcmd() < 0)
8395                 goto out_free_temp_buffer;
8396
8397         /* TODO: make the number of buffers hot pluggable with CPUS */
8398         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8399                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8400                 WARN_ON(1);
8401                 goto out_free_savedcmd;
8402         }
8403
8404         if (global_trace.buffer_disabled)
8405                 tracing_off();
8406
8407         if (trace_boot_clock) {
8408                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8409                 if (ret < 0)
8410                         pr_warn("Trace clock %s not defined, going back to default\n",
8411                                 trace_boot_clock);
8412         }
8413
8414         /*
8415          * register_tracer() might reference current_trace, so it
8416          * needs to be set before we register anything. This is
8417          * just a bootstrap of current_trace anyway.
8418          */
8419         global_trace.current_trace = &nop_trace;
8420
8421         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8422
8423         ftrace_init_global_array_ops(&global_trace);
8424
8425         init_trace_flags_index(&global_trace);
8426
8427         register_tracer(&nop_trace);
8428
8429         /* Function tracing may start here (via kernel command line) */
8430         init_function_trace();
8431
8432         /* All seems OK, enable tracing */
8433         tracing_disabled = 0;
8434
8435         atomic_notifier_chain_register(&panic_notifier_list,
8436                                        &trace_panic_notifier);
8437
8438         register_die_notifier(&trace_die_notifier);
8439
8440         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8441
8442         INIT_LIST_HEAD(&global_trace.systems);
8443         INIT_LIST_HEAD(&global_trace.events);
8444         list_add(&global_trace.list, &ftrace_trace_arrays);
8445
8446         apply_trace_boot_options();
8447
8448         register_snapshot_cmd();
8449
8450         return 0;
8451
8452 out_free_savedcmd:
8453         free_saved_cmdlines_buffer(savedcmd);
8454 out_free_temp_buffer:
8455         ring_buffer_free(temp_buffer);
8456 out_rm_hp_state:
8457         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8458 out_free_cpumask:
8459         free_cpumask_var(global_trace.tracing_cpumask);
8460 out_free_buffer_mask:
8461         free_cpumask_var(tracing_buffer_mask);
8462 out:
8463         return ret;
8464 }
8465
8466 void __init early_trace_init(void)
8467 {
8468         if (tracepoint_printk) {
8469                 tracepoint_print_iter =
8470                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8471                 if (WARN_ON(!tracepoint_print_iter))
8472                         tracepoint_printk = 0;
8473                 else
8474                         static_key_enable(&tracepoint_printk_key.key);
8475         }
8476         tracer_alloc_buffers();
8477 }
8478
8479 void __init trace_init(void)
8480 {
8481         trace_event_init();
8482 }
8483
8484 __init static int clear_boot_tracer(void)
8485 {
8486         /*
8487          * The default tracer at boot buffer is an init section.
8488          * This function is called in lateinit. If we did not
8489          * find the boot tracer, then clear it out, to prevent
8490          * later registration from accessing the buffer that is
8491          * about to be freed.
8492          */
8493         if (!default_bootup_tracer)
8494                 return 0;
8495
8496         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8497                default_bootup_tracer);
8498         default_bootup_tracer = NULL;
8499
8500         return 0;
8501 }
8502
8503 fs_initcall(tracer_init_tracefs);
8504 late_initcall_sync(clear_boot_tracer);