GNU Linux-libre 5.4.257-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48
49 #include "trace.h"
50 #include "trace_output.h"
51
52 /*
53  * On boot up, the ring buffer is set to the minimum size, so that
54  * we do not waste memory on systems that are not using tracing.
55  */
56 bool ring_buffer_expanded;
57
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If a tracer is running, we do not want to run SELFTEST.
69  */
70 bool __read_mostly tracing_selftest_disabled;
71
72 /* Pipe tracepoints to printk */
73 struct trace_iterator *tracepoint_print_iter;
74 int tracepoint_printk;
75 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
76
77 /* For tracers that don't implement custom flags */
78 static struct tracer_opt dummy_tracer_opt[] = {
79         { }
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 cpumask_var_t __read_mostly     tracing_buffer_mask;
104
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125
126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
127 /* Map of enums to their values, for "eval_map" file */
128 struct trace_eval_map_head {
129         struct module                   *mod;
130         unsigned long                   length;
131 };
132
133 union trace_eval_map_item;
134
135 struct trace_eval_map_tail {
136         /*
137          * "end" is first and points to NULL as it must be different
138          * than "mod" or "eval_string"
139          */
140         union trace_eval_map_item       *next;
141         const char                      *end;   /* points to NULL */
142 };
143
144 static DEFINE_MUTEX(trace_eval_mutex);
145
146 /*
147  * The trace_eval_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved eval_map items.
152  */
153 union trace_eval_map_item {
154         struct trace_eval_map           map;
155         struct trace_eval_map_head      head;
156         struct trace_eval_map_tail      tail;
157 };
158
159 static union trace_eval_map_item *trace_eval_maps;
160 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
161
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163 static void ftrace_trace_userstack(struct trace_array *tr,
164                                    struct ring_buffer *buffer,
165                                    unsigned long flags, int pc);
166
167 #define MAX_TRACER_SIZE         100
168 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
169 static char *default_bootup_tracer;
170
171 static bool allocate_snapshot;
172
173 static int __init set_cmdline_ftrace(char *str)
174 {
175         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
176         default_bootup_tracer = bootup_tracer_buf;
177         /* We are using ftrace early, expand it */
178         ring_buffer_expanded = true;
179         return 1;
180 }
181 __setup("ftrace=", set_cmdline_ftrace);
182
183 static int __init set_ftrace_dump_on_oops(char *str)
184 {
185         if (*str++ != '=' || !*str) {
186                 ftrace_dump_on_oops = DUMP_ALL;
187                 return 1;
188         }
189
190         if (!strcmp("orig_cpu", str)) {
191                 ftrace_dump_on_oops = DUMP_ORIG;
192                 return 1;
193         }
194
195         return 0;
196 }
197 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
198
199 static int __init stop_trace_on_warning(char *str)
200 {
201         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
202                 __disable_trace_on_warning = 1;
203         return 1;
204 }
205 __setup("traceoff_on_warning", stop_trace_on_warning);
206
207 static int __init boot_alloc_snapshot(char *str)
208 {
209         allocate_snapshot = true;
210         /* We also need the main ring buffer expanded */
211         ring_buffer_expanded = true;
212         return 1;
213 }
214 __setup("alloc_snapshot", boot_alloc_snapshot);
215
216
217 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
218
219 static int __init set_trace_boot_options(char *str)
220 {
221         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
222         return 1;
223 }
224 __setup("trace_options=", set_trace_boot_options);
225
226 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
227 static char *trace_boot_clock __initdata;
228
229 static int __init set_trace_boot_clock(char *str)
230 {
231         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
232         trace_boot_clock = trace_boot_clock_buf;
233         return 1;
234 }
235 __setup("trace_clock=", set_trace_boot_clock);
236
237 static int __init set_tracepoint_printk(char *str)
238 {
239         /* Ignore the "tp_printk_stop_on_boot" param */
240         if (*str == '_')
241                 return 0;
242
243         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
244                 tracepoint_printk = 1;
245         return 1;
246 }
247 __setup("tp_printk", set_tracepoint_printk);
248
249 unsigned long long ns2usecs(u64 nsec)
250 {
251         nsec += 500;
252         do_div(nsec, 1000);
253         return nsec;
254 }
255
256 /* trace_flags holds trace_options default values */
257 #define TRACE_DEFAULT_FLAGS                                             \
258         (FUNCTION_DEFAULT_FLAGS |                                       \
259          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
260          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
261          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
262          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
263
264 /* trace_options that are only supported by global_trace */
265 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
266                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
267
268 /* trace_flags that are default zero for instances */
269 #define ZEROED_TRACE_FLAGS \
270         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
271
272 /*
273  * The global_trace is the descriptor that holds the top-level tracing
274  * buffers for the live tracing.
275  */
276 static struct trace_array global_trace = {
277         .trace_flags = TRACE_DEFAULT_FLAGS,
278 };
279
280 LIST_HEAD(ftrace_trace_arrays);
281
282 int trace_array_get(struct trace_array *this_tr)
283 {
284         struct trace_array *tr;
285         int ret = -ENODEV;
286
287         mutex_lock(&trace_types_lock);
288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
289                 if (tr == this_tr) {
290                         tr->ref++;
291                         ret = 0;
292                         break;
293                 }
294         }
295         mutex_unlock(&trace_types_lock);
296
297         return ret;
298 }
299
300 static void __trace_array_put(struct trace_array *this_tr)
301 {
302         WARN_ON(!this_tr->ref);
303         this_tr->ref--;
304 }
305
306 void trace_array_put(struct trace_array *this_tr)
307 {
308         mutex_lock(&trace_types_lock);
309         __trace_array_put(this_tr);
310         mutex_unlock(&trace_types_lock);
311 }
312
313 int tracing_check_open_get_tr(struct trace_array *tr)
314 {
315         int ret;
316
317         ret = security_locked_down(LOCKDOWN_TRACEFS);
318         if (ret)
319                 return ret;
320
321         if (tracing_disabled)
322                 return -ENODEV;
323
324         if (tr && trace_array_get(tr) < 0)
325                 return -ENODEV;
326
327         return 0;
328 }
329
330 int call_filter_check_discard(struct trace_event_call *call, void *rec,
331                               struct ring_buffer *buffer,
332                               struct ring_buffer_event *event)
333 {
334         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
335             !filter_match_preds(call->filter, rec)) {
336                 __trace_event_discard_commit(buffer, event);
337                 return 1;
338         }
339
340         return 0;
341 }
342
343 void trace_free_pid_list(struct trace_pid_list *pid_list)
344 {
345         vfree(pid_list->pids);
346         kfree(pid_list);
347 }
348
349 /**
350  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
351  * @filtered_pids: The list of pids to check
352  * @search_pid: The PID to find in @filtered_pids
353  *
354  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
355  */
356 bool
357 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
358 {
359         /*
360          * If pid_max changed after filtered_pids was created, we
361          * by default ignore all pids greater than the previous pid_max.
362          */
363         if (search_pid >= filtered_pids->pid_max)
364                 return false;
365
366         return test_bit(search_pid, filtered_pids->pids);
367 }
368
369 /**
370  * trace_ignore_this_task - should a task be ignored for tracing
371  * @filtered_pids: The list of pids to check
372  * @task: The task that should be ignored if not filtered
373  *
374  * Checks if @task should be traced or not from @filtered_pids.
375  * Returns true if @task should *NOT* be traced.
376  * Returns false if @task should be traced.
377  */
378 bool
379 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
380 {
381         /*
382          * Return false, because if filtered_pids does not exist,
383          * all pids are good to trace.
384          */
385         if (!filtered_pids)
386                 return false;
387
388         return !trace_find_filtered_pid(filtered_pids, task->pid);
389 }
390
391 /**
392  * trace_filter_add_remove_task - Add or remove a task from a pid_list
393  * @pid_list: The list to modify
394  * @self: The current task for fork or NULL for exit
395  * @task: The task to add or remove
396  *
397  * If adding a task, if @self is defined, the task is only added if @self
398  * is also included in @pid_list. This happens on fork and tasks should
399  * only be added when the parent is listed. If @self is NULL, then the
400  * @task pid will be removed from the list, which would happen on exit
401  * of a task.
402  */
403 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
404                                   struct task_struct *self,
405                                   struct task_struct *task)
406 {
407         if (!pid_list)
408                 return;
409
410         /* For forks, we only add if the forking task is listed */
411         if (self) {
412                 if (!trace_find_filtered_pid(pid_list, self->pid))
413                         return;
414         }
415
416         /* Sorry, but we don't support pid_max changing after setting */
417         if (task->pid >= pid_list->pid_max)
418                 return;
419
420         /* "self" is set for forks, and NULL for exits */
421         if (self)
422                 set_bit(task->pid, pid_list->pids);
423         else
424                 clear_bit(task->pid, pid_list->pids);
425 }
426
427 /**
428  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
429  * @pid_list: The pid list to show
430  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
431  * @pos: The position of the file
432  *
433  * This is used by the seq_file "next" operation to iterate the pids
434  * listed in a trace_pid_list structure.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
440 {
441         unsigned long pid = (unsigned long)v;
442
443         (*pos)++;
444
445         /* pid already is +1 of the actual prevous bit */
446         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
447
448         /* Return pid + 1 to allow zero to be represented */
449         if (pid < pid_list->pid_max)
450                 return (void *)(pid + 1);
451
452         return NULL;
453 }
454
455 /**
456  * trace_pid_start - Used for seq_file to start reading pid lists
457  * @pid_list: The pid list to show
458  * @pos: The position of the file
459  *
460  * This is used by seq_file "start" operation to start the iteration
461  * of listing pids.
462  *
463  * Returns the pid+1 as we want to display pid of zero, but NULL would
464  * stop the iteration.
465  */
466 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
467 {
468         unsigned long pid;
469         loff_t l = 0;
470
471         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
472         if (pid >= pid_list->pid_max)
473                 return NULL;
474
475         /* Return pid + 1 so that zero can be the exit value */
476         for (pid++; pid && l < *pos;
477              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
478                 ;
479         return (void *)pid;
480 }
481
482 /**
483  * trace_pid_show - show the current pid in seq_file processing
484  * @m: The seq_file structure to write into
485  * @v: A void pointer of the pid (+1) value to display
486  *
487  * Can be directly used by seq_file operations to display the current
488  * pid value.
489  */
490 int trace_pid_show(struct seq_file *m, void *v)
491 {
492         unsigned long pid = (unsigned long)v - 1;
493
494         seq_printf(m, "%lu\n", pid);
495         return 0;
496 }
497
498 /* 128 should be much more than enough */
499 #define PID_BUF_SIZE            127
500
501 int trace_pid_write(struct trace_pid_list *filtered_pids,
502                     struct trace_pid_list **new_pid_list,
503                     const char __user *ubuf, size_t cnt)
504 {
505         struct trace_pid_list *pid_list;
506         struct trace_parser parser;
507         unsigned long val;
508         int nr_pids = 0;
509         ssize_t read = 0;
510         ssize_t ret = 0;
511         loff_t pos;
512         pid_t pid;
513
514         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
515                 return -ENOMEM;
516
517         /*
518          * Always recreate a new array. The write is an all or nothing
519          * operation. Always create a new array when adding new pids by
520          * the user. If the operation fails, then the current list is
521          * not modified.
522          */
523         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
524         if (!pid_list) {
525                 trace_parser_put(&parser);
526                 return -ENOMEM;
527         }
528
529         pid_list->pid_max = READ_ONCE(pid_max);
530
531         /* Only truncating will shrink pid_max */
532         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
533                 pid_list->pid_max = filtered_pids->pid_max;
534
535         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
536         if (!pid_list->pids) {
537                 trace_parser_put(&parser);
538                 kfree(pid_list);
539                 return -ENOMEM;
540         }
541
542         if (filtered_pids) {
543                 /* copy the current bits to the new max */
544                 for_each_set_bit(pid, filtered_pids->pids,
545                                  filtered_pids->pid_max) {
546                         set_bit(pid, pid_list->pids);
547                         nr_pids++;
548                 }
549         }
550
551         while (cnt > 0) {
552
553                 pos = 0;
554
555                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
556                 if (ret < 0 || !trace_parser_loaded(&parser))
557                         break;
558
559                 read += ret;
560                 ubuf += ret;
561                 cnt -= ret;
562
563                 ret = -EINVAL;
564                 if (kstrtoul(parser.buffer, 0, &val))
565                         break;
566                 if (val >= pid_list->pid_max)
567                         break;
568
569                 pid = (pid_t)val;
570
571                 set_bit(pid, pid_list->pids);
572                 nr_pids++;
573
574                 trace_parser_clear(&parser);
575                 ret = 0;
576         }
577         trace_parser_put(&parser);
578
579         if (ret < 0) {
580                 trace_free_pid_list(pid_list);
581                 return ret;
582         }
583
584         if (!nr_pids) {
585                 /* Cleared the list of pids */
586                 trace_free_pid_list(pid_list);
587                 read = ret;
588                 pid_list = NULL;
589         }
590
591         *new_pid_list = pid_list;
592
593         return read;
594 }
595
596 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
597 {
598         u64 ts;
599
600         /* Early boot up does not have a buffer yet */
601         if (!buf->buffer)
602                 return trace_clock_local();
603
604         ts = ring_buffer_time_stamp(buf->buffer, cpu);
605         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
606
607         return ts;
608 }
609
610 u64 ftrace_now(int cpu)
611 {
612         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
613 }
614
615 /**
616  * tracing_is_enabled - Show if global_trace has been disabled
617  *
618  * Shows if the global trace has been enabled or not. It uses the
619  * mirror flag "buffer_disabled" to be used in fast paths such as for
620  * the irqsoff tracer. But it may be inaccurate due to races. If you
621  * need to know the accurate state, use tracing_is_on() which is a little
622  * slower, but accurate.
623  */
624 int tracing_is_enabled(void)
625 {
626         /*
627          * For quick access (irqsoff uses this in fast path), just
628          * return the mirror variable of the state of the ring buffer.
629          * It's a little racy, but we don't really care.
630          */
631         smp_rmb();
632         return !global_trace.buffer_disabled;
633 }
634
635 /*
636  * trace_buf_size is the size in bytes that is allocated
637  * for a buffer. Note, the number of bytes is always rounded
638  * to page size.
639  *
640  * This number is purposely set to a low number of 16384.
641  * If the dump on oops happens, it will be much appreciated
642  * to not have to wait for all that output. Anyway this can be
643  * boot time and run time configurable.
644  */
645 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
646
647 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
648
649 /* trace_types holds a link list of available tracers. */
650 static struct tracer            *trace_types __read_mostly;
651
652 /*
653  * trace_types_lock is used to protect the trace_types list.
654  */
655 DEFINE_MUTEX(trace_types_lock);
656
657 /*
658  * serialize the access of the ring buffer
659  *
660  * ring buffer serializes readers, but it is low level protection.
661  * The validity of the events (which returns by ring_buffer_peek() ..etc)
662  * are not protected by ring buffer.
663  *
664  * The content of events may become garbage if we allow other process consumes
665  * these events concurrently:
666  *   A) the page of the consumed events may become a normal page
667  *      (not reader page) in ring buffer, and this page will be rewrited
668  *      by events producer.
669  *   B) The page of the consumed events may become a page for splice_read,
670  *      and this page will be returned to system.
671  *
672  * These primitives allow multi process access to different cpu ring buffer
673  * concurrently.
674  *
675  * These primitives don't distinguish read-only and read-consume access.
676  * Multi read-only access are also serialized.
677  */
678
679 #ifdef CONFIG_SMP
680 static DECLARE_RWSEM(all_cpu_access_lock);
681 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
682
683 static inline void trace_access_lock(int cpu)
684 {
685         if (cpu == RING_BUFFER_ALL_CPUS) {
686                 /* gain it for accessing the whole ring buffer. */
687                 down_write(&all_cpu_access_lock);
688         } else {
689                 /* gain it for accessing a cpu ring buffer. */
690
691                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
692                 down_read(&all_cpu_access_lock);
693
694                 /* Secondly block other access to this @cpu ring buffer. */
695                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
696         }
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 up_write(&all_cpu_access_lock);
703         } else {
704                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
705                 up_read(&all_cpu_access_lock);
706         }
707 }
708
709 static inline void trace_access_lock_init(void)
710 {
711         int cpu;
712
713         for_each_possible_cpu(cpu)
714                 mutex_init(&per_cpu(cpu_access_lock, cpu));
715 }
716
717 #else
718
719 static DEFINE_MUTEX(access_lock);
720
721 static inline void trace_access_lock(int cpu)
722 {
723         (void)cpu;
724         mutex_lock(&access_lock);
725 }
726
727 static inline void trace_access_unlock(int cpu)
728 {
729         (void)cpu;
730         mutex_unlock(&access_lock);
731 }
732
733 static inline void trace_access_lock_init(void)
734 {
735 }
736
737 #endif
738
739 #ifdef CONFIG_STACKTRACE
740 static void __ftrace_trace_stack(struct ring_buffer *buffer,
741                                  unsigned long flags,
742                                  int skip, int pc, struct pt_regs *regs);
743 static inline void ftrace_trace_stack(struct trace_array *tr,
744                                       struct ring_buffer *buffer,
745                                       unsigned long flags,
746                                       int skip, int pc, struct pt_regs *regs);
747
748 #else
749 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
750                                         unsigned long flags,
751                                         int skip, int pc, struct pt_regs *regs)
752 {
753 }
754 static inline void ftrace_trace_stack(struct trace_array *tr,
755                                       struct ring_buffer *buffer,
756                                       unsigned long flags,
757                                       int skip, int pc, struct pt_regs *regs)
758 {
759 }
760
761 #endif
762
763 static __always_inline void
764 trace_event_setup(struct ring_buffer_event *event,
765                   int type, unsigned long flags, int pc)
766 {
767         struct trace_entry *ent = ring_buffer_event_data(event);
768
769         tracing_generic_entry_update(ent, type, flags, pc);
770 }
771
772 static __always_inline struct ring_buffer_event *
773 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
774                           int type,
775                           unsigned long len,
776                           unsigned long flags, int pc)
777 {
778         struct ring_buffer_event *event;
779
780         event = ring_buffer_lock_reserve(buffer, len);
781         if (event != NULL)
782                 trace_event_setup(event, type, flags, pc);
783
784         return event;
785 }
786
787 void tracer_tracing_on(struct trace_array *tr)
788 {
789         if (tr->trace_buffer.buffer)
790                 ring_buffer_record_on(tr->trace_buffer.buffer);
791         /*
792          * This flag is looked at when buffers haven't been allocated
793          * yet, or by some tracers (like irqsoff), that just want to
794          * know if the ring buffer has been disabled, but it can handle
795          * races of where it gets disabled but we still do a record.
796          * As the check is in the fast path of the tracers, it is more
797          * important to be fast than accurate.
798          */
799         tr->buffer_disabled = 0;
800         /* Make the flag seen by readers */
801         smp_wmb();
802 }
803
804 /**
805  * tracing_on - enable tracing buffers
806  *
807  * This function enables tracing buffers that may have been
808  * disabled with tracing_off.
809  */
810 void tracing_on(void)
811 {
812         tracer_tracing_on(&global_trace);
813 }
814 EXPORT_SYMBOL_GPL(tracing_on);
815
816
817 static __always_inline void
818 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
819 {
820         __this_cpu_write(trace_taskinfo_save, true);
821
822         /* If this is the temp buffer, we need to commit fully */
823         if (this_cpu_read(trace_buffered_event) == event) {
824                 /* Length is in event->array[0] */
825                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
826                 /* Release the temp buffer */
827                 this_cpu_dec(trace_buffered_event_cnt);
828         } else
829                 ring_buffer_unlock_commit(buffer, event);
830 }
831
832 /**
833  * __trace_puts - write a constant string into the trace buffer.
834  * @ip:    The address of the caller
835  * @str:   The constant string to write
836  * @size:  The size of the string.
837  */
838 int __trace_puts(unsigned long ip, const char *str, int size)
839 {
840         struct ring_buffer_event *event;
841         struct ring_buffer *buffer;
842         struct print_entry *entry;
843         unsigned long irq_flags;
844         int alloc;
845         int pc;
846
847         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
848                 return 0;
849
850         pc = preempt_count();
851
852         if (unlikely(tracing_selftest_running || tracing_disabled))
853                 return 0;
854
855         alloc = sizeof(*entry) + size + 2; /* possible \n added */
856
857         local_save_flags(irq_flags);
858         buffer = global_trace.trace_buffer.buffer;
859         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
860                                             irq_flags, pc);
861         if (!event)
862                 return 0;
863
864         entry = ring_buffer_event_data(event);
865         entry->ip = ip;
866
867         memcpy(&entry->buf, str, size);
868
869         /* Add a newline if necessary */
870         if (entry->buf[size - 1] != '\n') {
871                 entry->buf[size] = '\n';
872                 entry->buf[size + 1] = '\0';
873         } else
874                 entry->buf[size] = '\0';
875
876         __buffer_unlock_commit(buffer, event);
877         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
878
879         return size;
880 }
881 EXPORT_SYMBOL_GPL(__trace_puts);
882
883 /**
884  * __trace_bputs - write the pointer to a constant string into trace buffer
885  * @ip:    The address of the caller
886  * @str:   The constant string to write to the buffer to
887  */
888 int __trace_bputs(unsigned long ip, const char *str)
889 {
890         struct ring_buffer_event *event;
891         struct ring_buffer *buffer;
892         struct bputs_entry *entry;
893         unsigned long irq_flags;
894         int size = sizeof(struct bputs_entry);
895         int pc;
896
897         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
898                 return 0;
899
900         pc = preempt_count();
901
902         if (unlikely(tracing_selftest_running || tracing_disabled))
903                 return 0;
904
905         local_save_flags(irq_flags);
906         buffer = global_trace.trace_buffer.buffer;
907         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
908                                             irq_flags, pc);
909         if (!event)
910                 return 0;
911
912         entry = ring_buffer_event_data(event);
913         entry->ip                       = ip;
914         entry->str                      = str;
915
916         __buffer_unlock_commit(buffer, event);
917         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
918
919         return 1;
920 }
921 EXPORT_SYMBOL_GPL(__trace_bputs);
922
923 #ifdef CONFIG_TRACER_SNAPSHOT
924 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
925 {
926         struct tracer *tracer = tr->current_trace;
927         unsigned long flags;
928
929         if (in_nmi()) {
930                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
931                 internal_trace_puts("*** snapshot is being ignored        ***\n");
932                 return;
933         }
934
935         if (!tr->allocated_snapshot) {
936                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
937                 internal_trace_puts("*** stopping trace here!   ***\n");
938                 tracing_off();
939                 return;
940         }
941
942         /* Note, snapshot can not be used when the tracer uses it */
943         if (tracer->use_max_tr) {
944                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
945                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
946                 return;
947         }
948
949         local_irq_save(flags);
950         update_max_tr(tr, current, smp_processor_id(), cond_data);
951         local_irq_restore(flags);
952 }
953
954 void tracing_snapshot_instance(struct trace_array *tr)
955 {
956         tracing_snapshot_instance_cond(tr, NULL);
957 }
958
959 /**
960  * tracing_snapshot - take a snapshot of the current buffer.
961  *
962  * This causes a swap between the snapshot buffer and the current live
963  * tracing buffer. You can use this to take snapshots of the live
964  * trace when some condition is triggered, but continue to trace.
965  *
966  * Note, make sure to allocate the snapshot with either
967  * a tracing_snapshot_alloc(), or by doing it manually
968  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
969  *
970  * If the snapshot buffer is not allocated, it will stop tracing.
971  * Basically making a permanent snapshot.
972  */
973 void tracing_snapshot(void)
974 {
975         struct trace_array *tr = &global_trace;
976
977         tracing_snapshot_instance(tr);
978 }
979 EXPORT_SYMBOL_GPL(tracing_snapshot);
980
981 /**
982  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
983  * @tr:         The tracing instance to snapshot
984  * @cond_data:  The data to be tested conditionally, and possibly saved
985  *
986  * This is the same as tracing_snapshot() except that the snapshot is
987  * conditional - the snapshot will only happen if the
988  * cond_snapshot.update() implementation receiving the cond_data
989  * returns true, which means that the trace array's cond_snapshot
990  * update() operation used the cond_data to determine whether the
991  * snapshot should be taken, and if it was, presumably saved it along
992  * with the snapshot.
993  */
994 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
995 {
996         tracing_snapshot_instance_cond(tr, cond_data);
997 }
998 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
999
1000 /**
1001  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1002  * @tr:         The tracing instance
1003  *
1004  * When the user enables a conditional snapshot using
1005  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1006  * with the snapshot.  This accessor is used to retrieve it.
1007  *
1008  * Should not be called from cond_snapshot.update(), since it takes
1009  * the tr->max_lock lock, which the code calling
1010  * cond_snapshot.update() has already done.
1011  *
1012  * Returns the cond_data associated with the trace array's snapshot.
1013  */
1014 void *tracing_cond_snapshot_data(struct trace_array *tr)
1015 {
1016         void *cond_data = NULL;
1017
1018         local_irq_disable();
1019         arch_spin_lock(&tr->max_lock);
1020
1021         if (tr->cond_snapshot)
1022                 cond_data = tr->cond_snapshot->cond_data;
1023
1024         arch_spin_unlock(&tr->max_lock);
1025         local_irq_enable();
1026
1027         return cond_data;
1028 }
1029 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1030
1031 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1032                                         struct trace_buffer *size_buf, int cpu_id);
1033 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1034
1035 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1036 {
1037         int ret;
1038
1039         if (!tr->allocated_snapshot) {
1040
1041                 /* allocate spare buffer */
1042                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1043                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1044                 if (ret < 0)
1045                         return ret;
1046
1047                 tr->allocated_snapshot = true;
1048         }
1049
1050         return 0;
1051 }
1052
1053 static void free_snapshot(struct trace_array *tr)
1054 {
1055         /*
1056          * We don't free the ring buffer. instead, resize it because
1057          * The max_tr ring buffer has some state (e.g. ring->clock) and
1058          * we want preserve it.
1059          */
1060         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1061         set_buffer_entries(&tr->max_buffer, 1);
1062         tracing_reset_online_cpus(&tr->max_buffer);
1063         tr->allocated_snapshot = false;
1064 }
1065
1066 /**
1067  * tracing_alloc_snapshot - allocate snapshot buffer.
1068  *
1069  * This only allocates the snapshot buffer if it isn't already
1070  * allocated - it doesn't also take a snapshot.
1071  *
1072  * This is meant to be used in cases where the snapshot buffer needs
1073  * to be set up for events that can't sleep but need to be able to
1074  * trigger a snapshot.
1075  */
1076 int tracing_alloc_snapshot(void)
1077 {
1078         struct trace_array *tr = &global_trace;
1079         int ret;
1080
1081         ret = tracing_alloc_snapshot_instance(tr);
1082         WARN_ON(ret < 0);
1083
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1087
1088 /**
1089  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1090  *
1091  * This is similar to tracing_snapshot(), but it will allocate the
1092  * snapshot buffer if it isn't already allocated. Use this only
1093  * where it is safe to sleep, as the allocation may sleep.
1094  *
1095  * This causes a swap between the snapshot buffer and the current live
1096  * tracing buffer. You can use this to take snapshots of the live
1097  * trace when some condition is triggered, but continue to trace.
1098  */
1099 void tracing_snapshot_alloc(void)
1100 {
1101         int ret;
1102
1103         ret = tracing_alloc_snapshot();
1104         if (ret < 0)
1105                 return;
1106
1107         tracing_snapshot();
1108 }
1109 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1110
1111 /**
1112  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1113  * @tr:         The tracing instance
1114  * @cond_data:  User data to associate with the snapshot
1115  * @update:     Implementation of the cond_snapshot update function
1116  *
1117  * Check whether the conditional snapshot for the given instance has
1118  * already been enabled, or if the current tracer is already using a
1119  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1120  * save the cond_data and update function inside.
1121  *
1122  * Returns 0 if successful, error otherwise.
1123  */
1124 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1125                                  cond_update_fn_t update)
1126 {
1127         struct cond_snapshot *cond_snapshot;
1128         int ret = 0;
1129
1130         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1131         if (!cond_snapshot)
1132                 return -ENOMEM;
1133
1134         cond_snapshot->cond_data = cond_data;
1135         cond_snapshot->update = update;
1136
1137         mutex_lock(&trace_types_lock);
1138
1139         ret = tracing_alloc_snapshot_instance(tr);
1140         if (ret)
1141                 goto fail_unlock;
1142
1143         if (tr->current_trace->use_max_tr) {
1144                 ret = -EBUSY;
1145                 goto fail_unlock;
1146         }
1147
1148         /*
1149          * The cond_snapshot can only change to NULL without the
1150          * trace_types_lock. We don't care if we race with it going
1151          * to NULL, but we want to make sure that it's not set to
1152          * something other than NULL when we get here, which we can
1153          * do safely with only holding the trace_types_lock and not
1154          * having to take the max_lock.
1155          */
1156         if (tr->cond_snapshot) {
1157                 ret = -EBUSY;
1158                 goto fail_unlock;
1159         }
1160
1161         local_irq_disable();
1162         arch_spin_lock(&tr->max_lock);
1163         tr->cond_snapshot = cond_snapshot;
1164         arch_spin_unlock(&tr->max_lock);
1165         local_irq_enable();
1166
1167         mutex_unlock(&trace_types_lock);
1168
1169         return ret;
1170
1171  fail_unlock:
1172         mutex_unlock(&trace_types_lock);
1173         kfree(cond_snapshot);
1174         return ret;
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1177
1178 /**
1179  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1180  * @tr:         The tracing instance
1181  *
1182  * Check whether the conditional snapshot for the given instance is
1183  * enabled; if so, free the cond_snapshot associated with it,
1184  * otherwise return -EINVAL.
1185  *
1186  * Returns 0 if successful, error otherwise.
1187  */
1188 int tracing_snapshot_cond_disable(struct trace_array *tr)
1189 {
1190         int ret = 0;
1191
1192         local_irq_disable();
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (!tr->cond_snapshot)
1196                 ret = -EINVAL;
1197         else {
1198                 kfree(tr->cond_snapshot);
1199                 tr->cond_snapshot = NULL;
1200         }
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return ret;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1208 #else
1209 void tracing_snapshot(void)
1210 {
1211         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1212 }
1213 EXPORT_SYMBOL_GPL(tracing_snapshot);
1214 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1215 {
1216         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1217 }
1218 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1219 int tracing_alloc_snapshot(void)
1220 {
1221         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1222         return -ENODEV;
1223 }
1224 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1225 void tracing_snapshot_alloc(void)
1226 {
1227         /* Give warning */
1228         tracing_snapshot();
1229 }
1230 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1231 void *tracing_cond_snapshot_data(struct trace_array *tr)
1232 {
1233         return NULL;
1234 }
1235 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1236 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1237 {
1238         return -ENODEV;
1239 }
1240 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1241 int tracing_snapshot_cond_disable(struct trace_array *tr)
1242 {
1243         return false;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1246 #endif /* CONFIG_TRACER_SNAPSHOT */
1247
1248 void tracer_tracing_off(struct trace_array *tr)
1249 {
1250         if (tr->trace_buffer.buffer)
1251                 ring_buffer_record_off(tr->trace_buffer.buffer);
1252         /*
1253          * This flag is looked at when buffers haven't been allocated
1254          * yet, or by some tracers (like irqsoff), that just want to
1255          * know if the ring buffer has been disabled, but it can handle
1256          * races of where it gets disabled but we still do a record.
1257          * As the check is in the fast path of the tracers, it is more
1258          * important to be fast than accurate.
1259          */
1260         tr->buffer_disabled = 1;
1261         /* Make the flag seen by readers */
1262         smp_wmb();
1263 }
1264
1265 /**
1266  * tracing_off - turn off tracing buffers
1267  *
1268  * This function stops the tracing buffers from recording data.
1269  * It does not disable any overhead the tracers themselves may
1270  * be causing. This function simply causes all recording to
1271  * the ring buffers to fail.
1272  */
1273 void tracing_off(void)
1274 {
1275         tracer_tracing_off(&global_trace);
1276 }
1277 EXPORT_SYMBOL_GPL(tracing_off);
1278
1279 void disable_trace_on_warning(void)
1280 {
1281         if (__disable_trace_on_warning)
1282                 tracing_off();
1283 }
1284
1285 /**
1286  * tracer_tracing_is_on - show real state of ring buffer enabled
1287  * @tr : the trace array to know if ring buffer is enabled
1288  *
1289  * Shows real state of the ring buffer if it is enabled or not.
1290  */
1291 bool tracer_tracing_is_on(struct trace_array *tr)
1292 {
1293         if (tr->trace_buffer.buffer)
1294                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1295         return !tr->buffer_disabled;
1296 }
1297
1298 /**
1299  * tracing_is_on - show state of ring buffers enabled
1300  */
1301 int tracing_is_on(void)
1302 {
1303         return tracer_tracing_is_on(&global_trace);
1304 }
1305 EXPORT_SYMBOL_GPL(tracing_is_on);
1306
1307 static int __init set_buf_size(char *str)
1308 {
1309         unsigned long buf_size;
1310
1311         if (!str)
1312                 return 0;
1313         buf_size = memparse(str, &str);
1314         /*
1315          * nr_entries can not be zero and the startup
1316          * tests require some buffer space. Therefore
1317          * ensure we have at least 4096 bytes of buffer.
1318          */
1319         trace_buf_size = max(4096UL, buf_size);
1320         return 1;
1321 }
1322 __setup("trace_buf_size=", set_buf_size);
1323
1324 static int __init set_tracing_thresh(char *str)
1325 {
1326         unsigned long threshold;
1327         int ret;
1328
1329         if (!str)
1330                 return 0;
1331         ret = kstrtoul(str, 0, &threshold);
1332         if (ret < 0)
1333                 return 0;
1334         tracing_thresh = threshold * 1000;
1335         return 1;
1336 }
1337 __setup("tracing_thresh=", set_tracing_thresh);
1338
1339 unsigned long nsecs_to_usecs(unsigned long nsecs)
1340 {
1341         return nsecs / 1000;
1342 }
1343
1344 /*
1345  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1346  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1347  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1348  * of strings in the order that the evals (enum) were defined.
1349  */
1350 #undef C
1351 #define C(a, b) b
1352
1353 /* These must match the bit postions in trace_iterator_flags */
1354 static const char *trace_options[] = {
1355         TRACE_FLAGS
1356         NULL
1357 };
1358
1359 static struct {
1360         u64 (*func)(void);
1361         const char *name;
1362         int in_ns;              /* is this clock in nanoseconds? */
1363 } trace_clocks[] = {
1364         { trace_clock_local,            "local",        1 },
1365         { trace_clock_global,           "global",       1 },
1366         { trace_clock_counter,          "counter",      0 },
1367         { trace_clock_jiffies,          "uptime",       0 },
1368         { trace_clock,                  "perf",         1 },
1369         { ktime_get_mono_fast_ns,       "mono",         1 },
1370         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1371         { ktime_get_boot_fast_ns,       "boot",         1 },
1372         ARCH_TRACE_CLOCKS
1373 };
1374
1375 bool trace_clock_in_ns(struct trace_array *tr)
1376 {
1377         if (trace_clocks[tr->clock_id].in_ns)
1378                 return true;
1379
1380         return false;
1381 }
1382
1383 /*
1384  * trace_parser_get_init - gets the buffer for trace parser
1385  */
1386 int trace_parser_get_init(struct trace_parser *parser, int size)
1387 {
1388         memset(parser, 0, sizeof(*parser));
1389
1390         parser->buffer = kmalloc(size, GFP_KERNEL);
1391         if (!parser->buffer)
1392                 return 1;
1393
1394         parser->size = size;
1395         return 0;
1396 }
1397
1398 /*
1399  * trace_parser_put - frees the buffer for trace parser
1400  */
1401 void trace_parser_put(struct trace_parser *parser)
1402 {
1403         kfree(parser->buffer);
1404         parser->buffer = NULL;
1405 }
1406
1407 /*
1408  * trace_get_user - reads the user input string separated by  space
1409  * (matched by isspace(ch))
1410  *
1411  * For each string found the 'struct trace_parser' is updated,
1412  * and the function returns.
1413  *
1414  * Returns number of bytes read.
1415  *
1416  * See kernel/trace/trace.h for 'struct trace_parser' details.
1417  */
1418 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1419         size_t cnt, loff_t *ppos)
1420 {
1421         char ch;
1422         size_t read = 0;
1423         ssize_t ret;
1424
1425         if (!*ppos)
1426                 trace_parser_clear(parser);
1427
1428         ret = get_user(ch, ubuf++);
1429         if (ret)
1430                 goto out;
1431
1432         read++;
1433         cnt--;
1434
1435         /*
1436          * The parser is not finished with the last write,
1437          * continue reading the user input without skipping spaces.
1438          */
1439         if (!parser->cont) {
1440                 /* skip white space */
1441                 while (cnt && isspace(ch)) {
1442                         ret = get_user(ch, ubuf++);
1443                         if (ret)
1444                                 goto out;
1445                         read++;
1446                         cnt--;
1447                 }
1448
1449                 parser->idx = 0;
1450
1451                 /* only spaces were written */
1452                 if (isspace(ch) || !ch) {
1453                         *ppos += read;
1454                         ret = read;
1455                         goto out;
1456                 }
1457         }
1458
1459         /* read the non-space input */
1460         while (cnt && !isspace(ch) && ch) {
1461                 if (parser->idx < parser->size - 1)
1462                         parser->buffer[parser->idx++] = ch;
1463                 else {
1464                         ret = -EINVAL;
1465                         goto out;
1466                 }
1467                 ret = get_user(ch, ubuf++);
1468                 if (ret)
1469                         goto out;
1470                 read++;
1471                 cnt--;
1472         }
1473
1474         /* We either got finished input or we have to wait for another call. */
1475         if (isspace(ch) || !ch) {
1476                 parser->buffer[parser->idx] = 0;
1477                 parser->cont = false;
1478         } else if (parser->idx < parser->size - 1) {
1479                 parser->cont = true;
1480                 parser->buffer[parser->idx++] = ch;
1481                 /* Make sure the parsed string always terminates with '\0'. */
1482                 parser->buffer[parser->idx] = 0;
1483         } else {
1484                 ret = -EINVAL;
1485                 goto out;
1486         }
1487
1488         *ppos += read;
1489         ret = read;
1490
1491 out:
1492         return ret;
1493 }
1494
1495 /* TODO add a seq_buf_to_buffer() */
1496 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1497 {
1498         int len;
1499
1500         if (trace_seq_used(s) <= s->seq.readpos)
1501                 return -EBUSY;
1502
1503         len = trace_seq_used(s) - s->seq.readpos;
1504         if (cnt > len)
1505                 cnt = len;
1506         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1507
1508         s->seq.readpos += cnt;
1509         return cnt;
1510 }
1511
1512 unsigned long __read_mostly     tracing_thresh;
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515 /*
1516  * Copy the new maximum trace into the separate maximum-trace
1517  * structure. (this way the maximum trace is permanently saved,
1518  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1519  */
1520 static void
1521 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1522 {
1523         struct trace_buffer *trace_buf = &tr->trace_buffer;
1524         struct trace_buffer *max_buf = &tr->max_buffer;
1525         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1526         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1527
1528         max_buf->cpu = cpu;
1529         max_buf->time_start = data->preempt_timestamp;
1530
1531         max_data->saved_latency = tr->max_latency;
1532         max_data->critical_start = data->critical_start;
1533         max_data->critical_end = data->critical_end;
1534
1535         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1536         max_data->pid = tsk->pid;
1537         /*
1538          * If tsk == current, then use current_uid(), as that does not use
1539          * RCU. The irq tracer can be called out of RCU scope.
1540          */
1541         if (tsk == current)
1542                 max_data->uid = current_uid();
1543         else
1544                 max_data->uid = task_uid(tsk);
1545
1546         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1547         max_data->policy = tsk->policy;
1548         max_data->rt_priority = tsk->rt_priority;
1549
1550         /* record this tasks comm */
1551         tracing_record_cmdline(tsk);
1552 }
1553
1554 /**
1555  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1556  * @tr: tracer
1557  * @tsk: the task with the latency
1558  * @cpu: The cpu that initiated the trace.
1559  * @cond_data: User data associated with a conditional snapshot
1560  *
1561  * Flip the buffers between the @tr and the max_tr and record information
1562  * about which task was the cause of this latency.
1563  */
1564 void
1565 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1566               void *cond_data)
1567 {
1568         if (tr->stop_count)
1569                 return;
1570
1571         WARN_ON_ONCE(!irqs_disabled());
1572
1573         if (!tr->allocated_snapshot) {
1574                 /* Only the nop tracer should hit this when disabling */
1575                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1576                 return;
1577         }
1578
1579         arch_spin_lock(&tr->max_lock);
1580
1581         /* Inherit the recordable setting from trace_buffer */
1582         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1583                 ring_buffer_record_on(tr->max_buffer.buffer);
1584         else
1585                 ring_buffer_record_off(tr->max_buffer.buffer);
1586
1587 #ifdef CONFIG_TRACER_SNAPSHOT
1588         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1589                 goto out_unlock;
1590 #endif
1591         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1592
1593         __update_max_tr(tr, tsk, cpu);
1594
1595  out_unlock:
1596         arch_spin_unlock(&tr->max_lock);
1597 }
1598
1599 /**
1600  * update_max_tr_single - only copy one trace over, and reset the rest
1601  * @tr: tracer
1602  * @tsk: task with the latency
1603  * @cpu: the cpu of the buffer to copy.
1604  *
1605  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1606  */
1607 void
1608 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1609 {
1610         int ret;
1611
1612         if (tr->stop_count)
1613                 return;
1614
1615         WARN_ON_ONCE(!irqs_disabled());
1616         if (!tr->allocated_snapshot) {
1617                 /* Only the nop tracer should hit this when disabling */
1618                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1619                 return;
1620         }
1621
1622         arch_spin_lock(&tr->max_lock);
1623
1624         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1625
1626         if (ret == -EBUSY) {
1627                 /*
1628                  * We failed to swap the buffer due to a commit taking
1629                  * place on this CPU. We fail to record, but we reset
1630                  * the max trace buffer (no one writes directly to it)
1631                  * and flag that it failed.
1632                  */
1633                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1634                         "Failed to swap buffers due to commit in progress\n");
1635         }
1636
1637         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1638
1639         __update_max_tr(tr, tsk, cpu);
1640         arch_spin_unlock(&tr->max_lock);
1641 }
1642 #endif /* CONFIG_TRACER_MAX_TRACE */
1643
1644 static int wait_on_pipe(struct trace_iterator *iter, int full)
1645 {
1646         /* Iterators are static, they should be filled or empty */
1647         if (trace_buffer_iter(iter, iter->cpu_file))
1648                 return 0;
1649
1650         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1651                                 full);
1652 }
1653
1654 #ifdef CONFIG_FTRACE_STARTUP_TEST
1655 static bool selftests_can_run;
1656
1657 struct trace_selftests {
1658         struct list_head                list;
1659         struct tracer                   *type;
1660 };
1661
1662 static LIST_HEAD(postponed_selftests);
1663
1664 static int save_selftest(struct tracer *type)
1665 {
1666         struct trace_selftests *selftest;
1667
1668         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1669         if (!selftest)
1670                 return -ENOMEM;
1671
1672         selftest->type = type;
1673         list_add(&selftest->list, &postponed_selftests);
1674         return 0;
1675 }
1676
1677 static int run_tracer_selftest(struct tracer *type)
1678 {
1679         struct trace_array *tr = &global_trace;
1680         struct tracer *saved_tracer = tr->current_trace;
1681         int ret;
1682
1683         if (!type->selftest || tracing_selftest_disabled)
1684                 return 0;
1685
1686         /*
1687          * If a tracer registers early in boot up (before scheduling is
1688          * initialized and such), then do not run its selftests yet.
1689          * Instead, run it a little later in the boot process.
1690          */
1691         if (!selftests_can_run)
1692                 return save_selftest(type);
1693
1694         /*
1695          * Run a selftest on this tracer.
1696          * Here we reset the trace buffer, and set the current
1697          * tracer to be this tracer. The tracer can then run some
1698          * internal tracing to verify that everything is in order.
1699          * If we fail, we do not register this tracer.
1700          */
1701         tracing_reset_online_cpus(&tr->trace_buffer);
1702
1703         tr->current_trace = type;
1704
1705 #ifdef CONFIG_TRACER_MAX_TRACE
1706         if (type->use_max_tr) {
1707                 /* If we expanded the buffers, make sure the max is expanded too */
1708                 if (ring_buffer_expanded)
1709                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1710                                            RING_BUFFER_ALL_CPUS);
1711                 tr->allocated_snapshot = true;
1712         }
1713 #endif
1714
1715         /* the test is responsible for initializing and enabling */
1716         pr_info("Testing tracer %s: ", type->name);
1717         ret = type->selftest(type, tr);
1718         /* the test is responsible for resetting too */
1719         tr->current_trace = saved_tracer;
1720         if (ret) {
1721                 printk(KERN_CONT "FAILED!\n");
1722                 /* Add the warning after printing 'FAILED' */
1723                 WARN_ON(1);
1724                 return -1;
1725         }
1726         /* Only reset on passing, to avoid touching corrupted buffers */
1727         tracing_reset_online_cpus(&tr->trace_buffer);
1728
1729 #ifdef CONFIG_TRACER_MAX_TRACE
1730         if (type->use_max_tr) {
1731                 tr->allocated_snapshot = false;
1732
1733                 /* Shrink the max buffer again */
1734                 if (ring_buffer_expanded)
1735                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1736                                            RING_BUFFER_ALL_CPUS);
1737         }
1738 #endif
1739
1740         printk(KERN_CONT "PASSED\n");
1741         return 0;
1742 }
1743
1744 static __init int init_trace_selftests(void)
1745 {
1746         struct trace_selftests *p, *n;
1747         struct tracer *t, **last;
1748         int ret;
1749
1750         selftests_can_run = true;
1751
1752         mutex_lock(&trace_types_lock);
1753
1754         if (list_empty(&postponed_selftests))
1755                 goto out;
1756
1757         pr_info("Running postponed tracer tests:\n");
1758
1759         tracing_selftest_running = true;
1760         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1761                 /* This loop can take minutes when sanitizers are enabled, so
1762                  * lets make sure we allow RCU processing.
1763                  */
1764                 cond_resched();
1765                 ret = run_tracer_selftest(p->type);
1766                 /* If the test fails, then warn and remove from available_tracers */
1767                 if (ret < 0) {
1768                         WARN(1, "tracer: %s failed selftest, disabling\n",
1769                              p->type->name);
1770                         last = &trace_types;
1771                         for (t = trace_types; t; t = t->next) {
1772                                 if (t == p->type) {
1773                                         *last = t->next;
1774                                         break;
1775                                 }
1776                                 last = &t->next;
1777                         }
1778                 }
1779                 list_del(&p->list);
1780                 kfree(p);
1781         }
1782         tracing_selftest_running = false;
1783
1784  out:
1785         mutex_unlock(&trace_types_lock);
1786
1787         return 0;
1788 }
1789 core_initcall(init_trace_selftests);
1790 #else
1791 static inline int run_tracer_selftest(struct tracer *type)
1792 {
1793         return 0;
1794 }
1795 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1796
1797 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1798
1799 static void __init apply_trace_boot_options(void);
1800
1801 /**
1802  * register_tracer - register a tracer with the ftrace system.
1803  * @type: the plugin for the tracer
1804  *
1805  * Register a new plugin tracer.
1806  */
1807 int __init register_tracer(struct tracer *type)
1808 {
1809         struct tracer *t;
1810         int ret = 0;
1811
1812         if (!type->name) {
1813                 pr_info("Tracer must have a name\n");
1814                 return -1;
1815         }
1816
1817         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1818                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1819                 return -1;
1820         }
1821
1822         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1823                 pr_warning("Can not register tracer %s due to lockdown\n",
1824                            type->name);
1825                 return -EPERM;
1826         }
1827
1828         mutex_lock(&trace_types_lock);
1829
1830         tracing_selftest_running = true;
1831
1832         for (t = trace_types; t; t = t->next) {
1833                 if (strcmp(type->name, t->name) == 0) {
1834                         /* already found */
1835                         pr_info("Tracer %s already registered\n",
1836                                 type->name);
1837                         ret = -1;
1838                         goto out;
1839                 }
1840         }
1841
1842         if (!type->set_flag)
1843                 type->set_flag = &dummy_set_flag;
1844         if (!type->flags) {
1845                 /*allocate a dummy tracer_flags*/
1846                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1847                 if (!type->flags) {
1848                         ret = -ENOMEM;
1849                         goto out;
1850                 }
1851                 type->flags->val = 0;
1852                 type->flags->opts = dummy_tracer_opt;
1853         } else
1854                 if (!type->flags->opts)
1855                         type->flags->opts = dummy_tracer_opt;
1856
1857         /* store the tracer for __set_tracer_option */
1858         type->flags->trace = type;
1859
1860         ret = run_tracer_selftest(type);
1861         if (ret < 0)
1862                 goto out;
1863
1864         type->next = trace_types;
1865         trace_types = type;
1866         add_tracer_options(&global_trace, type);
1867
1868  out:
1869         tracing_selftest_running = false;
1870         mutex_unlock(&trace_types_lock);
1871
1872         if (ret || !default_bootup_tracer)
1873                 goto out_unlock;
1874
1875         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1876                 goto out_unlock;
1877
1878         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1879         /* Do we want this tracer to start on bootup? */
1880         tracing_set_tracer(&global_trace, type->name);
1881         default_bootup_tracer = NULL;
1882
1883         apply_trace_boot_options();
1884
1885         /* disable other selftests, since this will break it. */
1886         tracing_selftest_disabled = true;
1887 #ifdef CONFIG_FTRACE_STARTUP_TEST
1888         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1889                type->name);
1890 #endif
1891
1892  out_unlock:
1893         return ret;
1894 }
1895
1896 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1897 {
1898         struct ring_buffer *buffer = buf->buffer;
1899
1900         if (!buffer)
1901                 return;
1902
1903         ring_buffer_record_disable(buffer);
1904
1905         /* Make sure all commits have finished */
1906         synchronize_rcu();
1907         ring_buffer_reset_cpu(buffer, cpu);
1908
1909         ring_buffer_record_enable(buffer);
1910 }
1911
1912 void tracing_reset_online_cpus(struct trace_buffer *buf)
1913 {
1914         struct ring_buffer *buffer = buf->buffer;
1915         int cpu;
1916
1917         if (!buffer)
1918                 return;
1919
1920         ring_buffer_record_disable(buffer);
1921
1922         /* Make sure all commits have finished */
1923         synchronize_rcu();
1924
1925         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1926
1927         for_each_online_cpu(cpu)
1928                 ring_buffer_reset_cpu(buffer, cpu);
1929
1930         ring_buffer_record_enable(buffer);
1931 }
1932
1933 /* Must have trace_types_lock held */
1934 void tracing_reset_all_online_cpus_unlocked(void)
1935 {
1936         struct trace_array *tr;
1937
1938         lockdep_assert_held(&trace_types_lock);
1939
1940         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1941                 if (!tr->clear_trace)
1942                         continue;
1943                 tr->clear_trace = false;
1944                 tracing_reset_online_cpus(&tr->trace_buffer);
1945 #ifdef CONFIG_TRACER_MAX_TRACE
1946                 tracing_reset_online_cpus(&tr->max_buffer);
1947 #endif
1948         }
1949 }
1950
1951 void tracing_reset_all_online_cpus(void)
1952 {
1953         mutex_lock(&trace_types_lock);
1954         tracing_reset_all_online_cpus_unlocked();
1955         mutex_unlock(&trace_types_lock);
1956 }
1957
1958 /*
1959  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1960  * is the tgid last observed corresponding to pid=i.
1961  */
1962 static int *tgid_map;
1963
1964 /* The maximum valid index into tgid_map. */
1965 static size_t tgid_map_max;
1966
1967 #define SAVED_CMDLINES_DEFAULT 128
1968 #define NO_CMDLINE_MAP UINT_MAX
1969 /*
1970  * Preemption must be disabled before acquiring trace_cmdline_lock.
1971  * The various trace_arrays' max_lock must be acquired in a context
1972  * where interrupt is disabled.
1973  */
1974 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1975 struct saved_cmdlines_buffer {
1976         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1977         unsigned *map_cmdline_to_pid;
1978         unsigned cmdline_num;
1979         int cmdline_idx;
1980         char *saved_cmdlines;
1981 };
1982 static struct saved_cmdlines_buffer *savedcmd;
1983
1984 static inline char *get_saved_cmdlines(int idx)
1985 {
1986         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1987 }
1988
1989 static inline void set_cmdline(int idx, const char *cmdline)
1990 {
1991         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1992 }
1993
1994 static int allocate_cmdlines_buffer(unsigned int val,
1995                                     struct saved_cmdlines_buffer *s)
1996 {
1997         s->map_cmdline_to_pid = kmalloc_array(val,
1998                                               sizeof(*s->map_cmdline_to_pid),
1999                                               GFP_KERNEL);
2000         if (!s->map_cmdline_to_pid)
2001                 return -ENOMEM;
2002
2003         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2004         if (!s->saved_cmdlines) {
2005                 kfree(s->map_cmdline_to_pid);
2006                 return -ENOMEM;
2007         }
2008
2009         s->cmdline_idx = 0;
2010         s->cmdline_num = val;
2011         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2012                sizeof(s->map_pid_to_cmdline));
2013         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2014                val * sizeof(*s->map_cmdline_to_pid));
2015
2016         return 0;
2017 }
2018
2019 static int trace_create_savedcmd(void)
2020 {
2021         int ret;
2022
2023         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2024         if (!savedcmd)
2025                 return -ENOMEM;
2026
2027         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2028         if (ret < 0) {
2029                 kfree(savedcmd);
2030                 savedcmd = NULL;
2031                 return -ENOMEM;
2032         }
2033
2034         return 0;
2035 }
2036
2037 int is_tracing_stopped(void)
2038 {
2039         return global_trace.stop_count;
2040 }
2041
2042 /**
2043  * tracing_start - quick start of the tracer
2044  *
2045  * If tracing is enabled but was stopped by tracing_stop,
2046  * this will start the tracer back up.
2047  */
2048 void tracing_start(void)
2049 {
2050         struct ring_buffer *buffer;
2051         unsigned long flags;
2052
2053         if (tracing_disabled)
2054                 return;
2055
2056         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2057         if (--global_trace.stop_count) {
2058                 if (global_trace.stop_count < 0) {
2059                         /* Someone screwed up their debugging */
2060                         WARN_ON_ONCE(1);
2061                         global_trace.stop_count = 0;
2062                 }
2063                 goto out;
2064         }
2065
2066         /* Prevent the buffers from switching */
2067         arch_spin_lock(&global_trace.max_lock);
2068
2069         buffer = global_trace.trace_buffer.buffer;
2070         if (buffer)
2071                 ring_buffer_record_enable(buffer);
2072
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074         buffer = global_trace.max_buffer.buffer;
2075         if (buffer)
2076                 ring_buffer_record_enable(buffer);
2077 #endif
2078
2079         arch_spin_unlock(&global_trace.max_lock);
2080
2081  out:
2082         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084
2085 static void tracing_start_tr(struct trace_array *tr)
2086 {
2087         struct ring_buffer *buffer;
2088         unsigned long flags;
2089
2090         if (tracing_disabled)
2091                 return;
2092
2093         /* If global, we need to also start the max tracer */
2094         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2095                 return tracing_start();
2096
2097         raw_spin_lock_irqsave(&tr->start_lock, flags);
2098
2099         if (--tr->stop_count) {
2100                 if (tr->stop_count < 0) {
2101                         /* Someone screwed up their debugging */
2102                         WARN_ON_ONCE(1);
2103                         tr->stop_count = 0;
2104                 }
2105                 goto out;
2106         }
2107
2108         buffer = tr->trace_buffer.buffer;
2109         if (buffer)
2110                 ring_buffer_record_enable(buffer);
2111
2112  out:
2113         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2114 }
2115
2116 /**
2117  * tracing_stop - quick stop of the tracer
2118  *
2119  * Light weight way to stop tracing. Use in conjunction with
2120  * tracing_start.
2121  */
2122 void tracing_stop(void)
2123 {
2124         struct ring_buffer *buffer;
2125         unsigned long flags;
2126
2127         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2128         if (global_trace.stop_count++)
2129                 goto out;
2130
2131         /* Prevent the buffers from switching */
2132         arch_spin_lock(&global_trace.max_lock);
2133
2134         buffer = global_trace.trace_buffer.buffer;
2135         if (buffer)
2136                 ring_buffer_record_disable(buffer);
2137
2138 #ifdef CONFIG_TRACER_MAX_TRACE
2139         buffer = global_trace.max_buffer.buffer;
2140         if (buffer)
2141                 ring_buffer_record_disable(buffer);
2142 #endif
2143
2144         arch_spin_unlock(&global_trace.max_lock);
2145
2146  out:
2147         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2148 }
2149
2150 static void tracing_stop_tr(struct trace_array *tr)
2151 {
2152         struct ring_buffer *buffer;
2153         unsigned long flags;
2154
2155         /* If global, we need to also stop the max tracer */
2156         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2157                 return tracing_stop();
2158
2159         raw_spin_lock_irqsave(&tr->start_lock, flags);
2160         if (tr->stop_count++)
2161                 goto out;
2162
2163         buffer = tr->trace_buffer.buffer;
2164         if (buffer)
2165                 ring_buffer_record_disable(buffer);
2166
2167  out:
2168         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2169 }
2170
2171 static int trace_save_cmdline(struct task_struct *tsk)
2172 {
2173         unsigned tpid, idx;
2174
2175         /* treat recording of idle task as a success */
2176         if (!tsk->pid)
2177                 return 1;
2178
2179         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2180
2181         /*
2182          * It's not the end of the world if we don't get
2183          * the lock, but we also don't want to spin
2184          * nor do we want to disable interrupts,
2185          * so if we miss here, then better luck next time.
2186          *
2187          * This is called within the scheduler and wake up, so interrupts
2188          * had better been disabled and run queue lock been held.
2189          */
2190         if (!arch_spin_trylock(&trace_cmdline_lock))
2191                 return 0;
2192
2193         idx = savedcmd->map_pid_to_cmdline[tpid];
2194         if (idx == NO_CMDLINE_MAP) {
2195                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2196
2197                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2198                 savedcmd->cmdline_idx = idx;
2199         }
2200
2201         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2202         set_cmdline(idx, tsk->comm);
2203
2204         arch_spin_unlock(&trace_cmdline_lock);
2205
2206         return 1;
2207 }
2208
2209 static void __trace_find_cmdline(int pid, char comm[])
2210 {
2211         unsigned map;
2212         int tpid;
2213
2214         if (!pid) {
2215                 strcpy(comm, "<idle>");
2216                 return;
2217         }
2218
2219         if (WARN_ON_ONCE(pid < 0)) {
2220                 strcpy(comm, "<XXX>");
2221                 return;
2222         }
2223
2224         tpid = pid & (PID_MAX_DEFAULT - 1);
2225         map = savedcmd->map_pid_to_cmdline[tpid];
2226         if (map != NO_CMDLINE_MAP) {
2227                 tpid = savedcmd->map_cmdline_to_pid[map];
2228                 if (tpid == pid) {
2229                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2230                         return;
2231                 }
2232         }
2233         strcpy(comm, "<...>");
2234 }
2235
2236 void trace_find_cmdline(int pid, char comm[])
2237 {
2238         preempt_disable();
2239         arch_spin_lock(&trace_cmdline_lock);
2240
2241         __trace_find_cmdline(pid, comm);
2242
2243         arch_spin_unlock(&trace_cmdline_lock);
2244         preempt_enable();
2245 }
2246
2247 static int *trace_find_tgid_ptr(int pid)
2248 {
2249         /*
2250          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2251          * if we observe a non-NULL tgid_map then we also observe the correct
2252          * tgid_map_max.
2253          */
2254         int *map = smp_load_acquire(&tgid_map);
2255
2256         if (unlikely(!map || pid > tgid_map_max))
2257                 return NULL;
2258
2259         return &map[pid];
2260 }
2261
2262 int trace_find_tgid(int pid)
2263 {
2264         int *ptr = trace_find_tgid_ptr(pid);
2265
2266         return ptr ? *ptr : 0;
2267 }
2268
2269 static int trace_save_tgid(struct task_struct *tsk)
2270 {
2271         int *ptr;
2272
2273         /* treat recording of idle task as a success */
2274         if (!tsk->pid)
2275                 return 1;
2276
2277         ptr = trace_find_tgid_ptr(tsk->pid);
2278         if (!ptr)
2279                 return 0;
2280
2281         *ptr = tsk->tgid;
2282         return 1;
2283 }
2284
2285 static bool tracing_record_taskinfo_skip(int flags)
2286 {
2287         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2288                 return true;
2289         if (!__this_cpu_read(trace_taskinfo_save))
2290                 return true;
2291         return false;
2292 }
2293
2294 /**
2295  * tracing_record_taskinfo - record the task info of a task
2296  *
2297  * @task:  task to record
2298  * @flags: TRACE_RECORD_CMDLINE for recording comm
2299  *         TRACE_RECORD_TGID for recording tgid
2300  */
2301 void tracing_record_taskinfo(struct task_struct *task, int flags)
2302 {
2303         bool done;
2304
2305         if (tracing_record_taskinfo_skip(flags))
2306                 return;
2307
2308         /*
2309          * Record as much task information as possible. If some fail, continue
2310          * to try to record the others.
2311          */
2312         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2313         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2314
2315         /* If recording any information failed, retry again soon. */
2316         if (!done)
2317                 return;
2318
2319         __this_cpu_write(trace_taskinfo_save, false);
2320 }
2321
2322 /**
2323  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2324  *
2325  * @prev: previous task during sched_switch
2326  * @next: next task during sched_switch
2327  * @flags: TRACE_RECORD_CMDLINE for recording comm
2328  *         TRACE_RECORD_TGID for recording tgid
2329  */
2330 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2331                                           struct task_struct *next, int flags)
2332 {
2333         bool done;
2334
2335         if (tracing_record_taskinfo_skip(flags))
2336                 return;
2337
2338         /*
2339          * Record as much task information as possible. If some fail, continue
2340          * to try to record the others.
2341          */
2342         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2343         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2344         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2345         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2346
2347         /* If recording any information failed, retry again soon. */
2348         if (!done)
2349                 return;
2350
2351         __this_cpu_write(trace_taskinfo_save, false);
2352 }
2353
2354 /* Helpers to record a specific task information */
2355 void tracing_record_cmdline(struct task_struct *task)
2356 {
2357         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2358 }
2359
2360 void tracing_record_tgid(struct task_struct *task)
2361 {
2362         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2363 }
2364
2365 /*
2366  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2367  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2368  * simplifies those functions and keeps them in sync.
2369  */
2370 enum print_line_t trace_handle_return(struct trace_seq *s)
2371 {
2372         return trace_seq_has_overflowed(s) ?
2373                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2374 }
2375 EXPORT_SYMBOL_GPL(trace_handle_return);
2376
2377 void
2378 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2379                              unsigned long flags, int pc)
2380 {
2381         struct task_struct *tsk = current;
2382
2383         entry->preempt_count            = pc & 0xff;
2384         entry->pid                      = (tsk) ? tsk->pid : 0;
2385         entry->type                     = type;
2386         entry->flags =
2387 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2388                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2389 #else
2390                 TRACE_FLAG_IRQS_NOSUPPORT |
2391 #endif
2392                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2393                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2394                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2395                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2396                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2397 }
2398 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2399
2400 struct ring_buffer_event *
2401 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2402                           int type,
2403                           unsigned long len,
2404                           unsigned long flags, int pc)
2405 {
2406         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2407 }
2408
2409 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2410 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2411 static int trace_buffered_event_ref;
2412
2413 /**
2414  * trace_buffered_event_enable - enable buffering events
2415  *
2416  * When events are being filtered, it is quicker to use a temporary
2417  * buffer to write the event data into if there's a likely chance
2418  * that it will not be committed. The discard of the ring buffer
2419  * is not as fast as committing, and is much slower than copying
2420  * a commit.
2421  *
2422  * When an event is to be filtered, allocate per cpu buffers to
2423  * write the event data into, and if the event is filtered and discarded
2424  * it is simply dropped, otherwise, the entire data is to be committed
2425  * in one shot.
2426  */
2427 void trace_buffered_event_enable(void)
2428 {
2429         struct ring_buffer_event *event;
2430         struct page *page;
2431         int cpu;
2432
2433         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2434
2435         if (trace_buffered_event_ref++)
2436                 return;
2437
2438         for_each_tracing_cpu(cpu) {
2439                 page = alloc_pages_node(cpu_to_node(cpu),
2440                                         GFP_KERNEL | __GFP_NORETRY, 0);
2441                 if (!page)
2442                         goto failed;
2443
2444                 event = page_address(page);
2445                 memset(event, 0, sizeof(*event));
2446
2447                 per_cpu(trace_buffered_event, cpu) = event;
2448
2449                 preempt_disable();
2450                 if (cpu == smp_processor_id() &&
2451                     this_cpu_read(trace_buffered_event) !=
2452                     per_cpu(trace_buffered_event, cpu))
2453                         WARN_ON_ONCE(1);
2454                 preempt_enable();
2455         }
2456
2457         return;
2458  failed:
2459         trace_buffered_event_disable();
2460 }
2461
2462 static void enable_trace_buffered_event(void *data)
2463 {
2464         /* Probably not needed, but do it anyway */
2465         smp_rmb();
2466         this_cpu_dec(trace_buffered_event_cnt);
2467 }
2468
2469 static void disable_trace_buffered_event(void *data)
2470 {
2471         this_cpu_inc(trace_buffered_event_cnt);
2472 }
2473
2474 /**
2475  * trace_buffered_event_disable - disable buffering events
2476  *
2477  * When a filter is removed, it is faster to not use the buffered
2478  * events, and to commit directly into the ring buffer. Free up
2479  * the temp buffers when there are no more users. This requires
2480  * special synchronization with current events.
2481  */
2482 void trace_buffered_event_disable(void)
2483 {
2484         int cpu;
2485
2486         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2487
2488         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2489                 return;
2490
2491         if (--trace_buffered_event_ref)
2492                 return;
2493
2494         preempt_disable();
2495         /* For each CPU, set the buffer as used. */
2496         smp_call_function_many(tracing_buffer_mask,
2497                                disable_trace_buffered_event, NULL, 1);
2498         preempt_enable();
2499
2500         /* Wait for all current users to finish */
2501         synchronize_rcu();
2502
2503         for_each_tracing_cpu(cpu) {
2504                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2505                 per_cpu(trace_buffered_event, cpu) = NULL;
2506         }
2507         /*
2508          * Make sure trace_buffered_event is NULL before clearing
2509          * trace_buffered_event_cnt.
2510          */
2511         smp_wmb();
2512
2513         preempt_disable();
2514         /* Do the work on each cpu */
2515         smp_call_function_many(tracing_buffer_mask,
2516                                enable_trace_buffered_event, NULL, 1);
2517         preempt_enable();
2518 }
2519
2520 static struct ring_buffer *temp_buffer;
2521
2522 struct ring_buffer_event *
2523 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2524                           struct trace_event_file *trace_file,
2525                           int type, unsigned long len,
2526                           unsigned long flags, int pc)
2527 {
2528         struct ring_buffer_event *entry;
2529         int val;
2530
2531         *current_rb = trace_file->tr->trace_buffer.buffer;
2532
2533         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2534              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2535             (entry = this_cpu_read(trace_buffered_event))) {
2536                 /* Try to use the per cpu buffer first */
2537                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2538                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2539                         trace_event_setup(entry, type, flags, pc);
2540                         entry->array[0] = len;
2541                         return entry;
2542                 }
2543                 this_cpu_dec(trace_buffered_event_cnt);
2544         }
2545
2546         entry = __trace_buffer_lock_reserve(*current_rb,
2547                                             type, len, flags, pc);
2548         /*
2549          * If tracing is off, but we have triggers enabled
2550          * we still need to look at the event data. Use the temp_buffer
2551          * to store the trace event for the trigger to use. It's recursive
2552          * safe and will not be recorded anywhere.
2553          */
2554         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2555                 *current_rb = temp_buffer;
2556                 entry = __trace_buffer_lock_reserve(*current_rb,
2557                                                     type, len, flags, pc);
2558         }
2559         return entry;
2560 }
2561 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2562
2563 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2564 static DEFINE_MUTEX(tracepoint_printk_mutex);
2565
2566 static void output_printk(struct trace_event_buffer *fbuffer)
2567 {
2568         struct trace_event_call *event_call;
2569         struct trace_event *event;
2570         unsigned long flags;
2571         struct trace_iterator *iter = tracepoint_print_iter;
2572
2573         /* We should never get here if iter is NULL */
2574         if (WARN_ON_ONCE(!iter))
2575                 return;
2576
2577         event_call = fbuffer->trace_file->event_call;
2578         if (!event_call || !event_call->event.funcs ||
2579             !event_call->event.funcs->trace)
2580                 return;
2581
2582         event = &fbuffer->trace_file->event_call->event;
2583
2584         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2585         trace_seq_init(&iter->seq);
2586         iter->ent = fbuffer->entry;
2587         event_call->event.funcs->trace(iter, 0, event);
2588         trace_seq_putc(&iter->seq, 0);
2589         printk("%s", iter->seq.buffer);
2590
2591         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2592 }
2593
2594 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2595                              void __user *buffer, size_t *lenp,
2596                              loff_t *ppos)
2597 {
2598         int save_tracepoint_printk;
2599         int ret;
2600
2601         mutex_lock(&tracepoint_printk_mutex);
2602         save_tracepoint_printk = tracepoint_printk;
2603
2604         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2605
2606         /*
2607          * This will force exiting early, as tracepoint_printk
2608          * is always zero when tracepoint_printk_iter is not allocated
2609          */
2610         if (!tracepoint_print_iter)
2611                 tracepoint_printk = 0;
2612
2613         if (save_tracepoint_printk == tracepoint_printk)
2614                 goto out;
2615
2616         if (tracepoint_printk)
2617                 static_key_enable(&tracepoint_printk_key.key);
2618         else
2619                 static_key_disable(&tracepoint_printk_key.key);
2620
2621  out:
2622         mutex_unlock(&tracepoint_printk_mutex);
2623
2624         return ret;
2625 }
2626
2627 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2628 {
2629         if (static_key_false(&tracepoint_printk_key.key))
2630                 output_printk(fbuffer);
2631
2632         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2633                                     fbuffer->event, fbuffer->entry,
2634                                     fbuffer->flags, fbuffer->pc);
2635 }
2636 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2637
2638 /*
2639  * Skip 3:
2640  *
2641  *   trace_buffer_unlock_commit_regs()
2642  *   trace_event_buffer_commit()
2643  *   trace_event_raw_event_xxx()
2644  */
2645 # define STACK_SKIP 3
2646
2647 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2648                                      struct ring_buffer *buffer,
2649                                      struct ring_buffer_event *event,
2650                                      unsigned long flags, int pc,
2651                                      struct pt_regs *regs)
2652 {
2653         __buffer_unlock_commit(buffer, event);
2654
2655         /*
2656          * If regs is not set, then skip the necessary functions.
2657          * Note, we can still get here via blktrace, wakeup tracer
2658          * and mmiotrace, but that's ok if they lose a function or
2659          * two. They are not that meaningful.
2660          */
2661         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2662         ftrace_trace_userstack(tr, buffer, flags, pc);
2663 }
2664
2665 /*
2666  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2667  */
2668 void
2669 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2670                                    struct ring_buffer_event *event)
2671 {
2672         __buffer_unlock_commit(buffer, event);
2673 }
2674
2675 static void
2676 trace_process_export(struct trace_export *export,
2677                struct ring_buffer_event *event)
2678 {
2679         struct trace_entry *entry;
2680         unsigned int size = 0;
2681
2682         entry = ring_buffer_event_data(event);
2683         size = ring_buffer_event_length(event);
2684         export->write(export, entry, size);
2685 }
2686
2687 static DEFINE_MUTEX(ftrace_export_lock);
2688
2689 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2690
2691 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2692
2693 static inline void ftrace_exports_enable(void)
2694 {
2695         static_branch_enable(&ftrace_exports_enabled);
2696 }
2697
2698 static inline void ftrace_exports_disable(void)
2699 {
2700         static_branch_disable(&ftrace_exports_enabled);
2701 }
2702
2703 static void ftrace_exports(struct ring_buffer_event *event)
2704 {
2705         struct trace_export *export;
2706
2707         preempt_disable_notrace();
2708
2709         export = rcu_dereference_raw_check(ftrace_exports_list);
2710         while (export) {
2711                 trace_process_export(export, event);
2712                 export = rcu_dereference_raw_check(export->next);
2713         }
2714
2715         preempt_enable_notrace();
2716 }
2717
2718 static inline void
2719 add_trace_export(struct trace_export **list, struct trace_export *export)
2720 {
2721         rcu_assign_pointer(export->next, *list);
2722         /*
2723          * We are entering export into the list but another
2724          * CPU might be walking that list. We need to make sure
2725          * the export->next pointer is valid before another CPU sees
2726          * the export pointer included into the list.
2727          */
2728         rcu_assign_pointer(*list, export);
2729 }
2730
2731 static inline int
2732 rm_trace_export(struct trace_export **list, struct trace_export *export)
2733 {
2734         struct trace_export **p;
2735
2736         for (p = list; *p != NULL; p = &(*p)->next)
2737                 if (*p == export)
2738                         break;
2739
2740         if (*p != export)
2741                 return -1;
2742
2743         rcu_assign_pointer(*p, (*p)->next);
2744
2745         return 0;
2746 }
2747
2748 static inline void
2749 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2750 {
2751         if (*list == NULL)
2752                 ftrace_exports_enable();
2753
2754         add_trace_export(list, export);
2755 }
2756
2757 static inline int
2758 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2759 {
2760         int ret;
2761
2762         ret = rm_trace_export(list, export);
2763         if (*list == NULL)
2764                 ftrace_exports_disable();
2765
2766         return ret;
2767 }
2768
2769 int register_ftrace_export(struct trace_export *export)
2770 {
2771         if (WARN_ON_ONCE(!export->write))
2772                 return -1;
2773
2774         mutex_lock(&ftrace_export_lock);
2775
2776         add_ftrace_export(&ftrace_exports_list, export);
2777
2778         mutex_unlock(&ftrace_export_lock);
2779
2780         return 0;
2781 }
2782 EXPORT_SYMBOL_GPL(register_ftrace_export);
2783
2784 int unregister_ftrace_export(struct trace_export *export)
2785 {
2786         int ret;
2787
2788         mutex_lock(&ftrace_export_lock);
2789
2790         ret = rm_ftrace_export(&ftrace_exports_list, export);
2791
2792         mutex_unlock(&ftrace_export_lock);
2793
2794         return ret;
2795 }
2796 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2797
2798 void
2799 trace_function(struct trace_array *tr,
2800                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2801                int pc)
2802 {
2803         struct trace_event_call *call = &event_function;
2804         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2805         struct ring_buffer_event *event;
2806         struct ftrace_entry *entry;
2807
2808         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2809                                             flags, pc);
2810         if (!event)
2811                 return;
2812         entry   = ring_buffer_event_data(event);
2813         entry->ip                       = ip;
2814         entry->parent_ip                = parent_ip;
2815
2816         if (!call_filter_check_discard(call, entry, buffer, event)) {
2817                 if (static_branch_unlikely(&ftrace_exports_enabled))
2818                         ftrace_exports(event);
2819                 __buffer_unlock_commit(buffer, event);
2820         }
2821 }
2822
2823 #ifdef CONFIG_STACKTRACE
2824
2825 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2826 #define FTRACE_KSTACK_NESTING   4
2827
2828 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2829
2830 struct ftrace_stack {
2831         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2832 };
2833
2834
2835 struct ftrace_stacks {
2836         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2837 };
2838
2839 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2840 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2841
2842 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2843                                  unsigned long flags,
2844                                  int skip, int pc, struct pt_regs *regs)
2845 {
2846         struct trace_event_call *call = &event_kernel_stack;
2847         struct ring_buffer_event *event;
2848         unsigned int size, nr_entries;
2849         struct ftrace_stack *fstack;
2850         struct stack_entry *entry;
2851         int stackidx;
2852
2853         /*
2854          * Add one, for this function and the call to save_stack_trace()
2855          * If regs is set, then these functions will not be in the way.
2856          */
2857 #ifndef CONFIG_UNWINDER_ORC
2858         if (!regs)
2859                 skip++;
2860 #endif
2861
2862         /*
2863          * Since events can happen in NMIs there's no safe way to
2864          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2865          * or NMI comes in, it will just have to use the default
2866          * FTRACE_STACK_SIZE.
2867          */
2868         preempt_disable_notrace();
2869
2870         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2871
2872         /* This should never happen. If it does, yell once and skip */
2873         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2874                 goto out;
2875
2876         /*
2877          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2878          * interrupt will either see the value pre increment or post
2879          * increment. If the interrupt happens pre increment it will have
2880          * restored the counter when it returns.  We just need a barrier to
2881          * keep gcc from moving things around.
2882          */
2883         barrier();
2884
2885         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2886         size = ARRAY_SIZE(fstack->calls);
2887
2888         if (regs) {
2889                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2890                                                    size, skip);
2891         } else {
2892                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2893         }
2894
2895         size = nr_entries * sizeof(unsigned long);
2896         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2897                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2898                                     flags, pc);
2899         if (!event)
2900                 goto out;
2901         entry = ring_buffer_event_data(event);
2902
2903         memcpy(&entry->caller, fstack->calls, size);
2904         entry->size = nr_entries;
2905
2906         if (!call_filter_check_discard(call, entry, buffer, event))
2907                 __buffer_unlock_commit(buffer, event);
2908
2909  out:
2910         /* Again, don't let gcc optimize things here */
2911         barrier();
2912         __this_cpu_dec(ftrace_stack_reserve);
2913         preempt_enable_notrace();
2914
2915 }
2916
2917 static inline void ftrace_trace_stack(struct trace_array *tr,
2918                                       struct ring_buffer *buffer,
2919                                       unsigned long flags,
2920                                       int skip, int pc, struct pt_regs *regs)
2921 {
2922         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2923                 return;
2924
2925         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2926 }
2927
2928 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2929                    int pc)
2930 {
2931         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2932
2933         if (rcu_is_watching()) {
2934                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2935                 return;
2936         }
2937
2938         /*
2939          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2940          * but if the above rcu_is_watching() failed, then the NMI
2941          * triggered someplace critical, and rcu_irq_enter() should
2942          * not be called from NMI.
2943          */
2944         if (unlikely(in_nmi()))
2945                 return;
2946
2947         rcu_irq_enter_irqson();
2948         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2949         rcu_irq_exit_irqson();
2950 }
2951
2952 /**
2953  * trace_dump_stack - record a stack back trace in the trace buffer
2954  * @skip: Number of functions to skip (helper handlers)
2955  */
2956 void trace_dump_stack(int skip)
2957 {
2958         unsigned long flags;
2959
2960         if (tracing_disabled || tracing_selftest_running)
2961                 return;
2962
2963         local_save_flags(flags);
2964
2965 #ifndef CONFIG_UNWINDER_ORC
2966         /* Skip 1 to skip this function. */
2967         skip++;
2968 #endif
2969         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2970                              flags, skip, preempt_count(), NULL);
2971 }
2972 EXPORT_SYMBOL_GPL(trace_dump_stack);
2973
2974 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2975 static DEFINE_PER_CPU(int, user_stack_count);
2976
2977 static void
2978 ftrace_trace_userstack(struct trace_array *tr,
2979                        struct ring_buffer *buffer, unsigned long flags, int pc)
2980 {
2981         struct trace_event_call *call = &event_user_stack;
2982         struct ring_buffer_event *event;
2983         struct userstack_entry *entry;
2984
2985         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2986                 return;
2987
2988         /*
2989          * NMIs can not handle page faults, even with fix ups.
2990          * The save user stack can (and often does) fault.
2991          */
2992         if (unlikely(in_nmi()))
2993                 return;
2994
2995         /*
2996          * prevent recursion, since the user stack tracing may
2997          * trigger other kernel events.
2998          */
2999         preempt_disable();
3000         if (__this_cpu_read(user_stack_count))
3001                 goto out;
3002
3003         __this_cpu_inc(user_stack_count);
3004
3005         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3006                                             sizeof(*entry), flags, pc);
3007         if (!event)
3008                 goto out_drop_count;
3009         entry   = ring_buffer_event_data(event);
3010
3011         entry->tgid             = current->tgid;
3012         memset(&entry->caller, 0, sizeof(entry->caller));
3013
3014         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3015         if (!call_filter_check_discard(call, entry, buffer, event))
3016                 __buffer_unlock_commit(buffer, event);
3017
3018  out_drop_count:
3019         __this_cpu_dec(user_stack_count);
3020  out:
3021         preempt_enable();
3022 }
3023 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3024 static void ftrace_trace_userstack(struct trace_array *tr,
3025                                    struct ring_buffer *buffer,
3026                                    unsigned long flags, int pc)
3027 {
3028 }
3029 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3030
3031 #endif /* CONFIG_STACKTRACE */
3032
3033 /* created for use with alloc_percpu */
3034 struct trace_buffer_struct {
3035         int nesting;
3036         char buffer[4][TRACE_BUF_SIZE];
3037 };
3038
3039 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3040
3041 /*
3042  * Thise allows for lockless recording.  If we're nested too deeply, then
3043  * this returns NULL.
3044  */
3045 static char *get_trace_buf(void)
3046 {
3047         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3048
3049         if (!trace_percpu_buffer || buffer->nesting >= 4)
3050                 return NULL;
3051
3052         buffer->nesting++;
3053
3054         /* Interrupts must see nesting incremented before we use the buffer */
3055         barrier();
3056         return &buffer->buffer[buffer->nesting - 1][0];
3057 }
3058
3059 static void put_trace_buf(void)
3060 {
3061         /* Don't let the decrement of nesting leak before this */
3062         barrier();
3063         this_cpu_dec(trace_percpu_buffer->nesting);
3064 }
3065
3066 static int alloc_percpu_trace_buffer(void)
3067 {
3068         struct trace_buffer_struct __percpu *buffers;
3069
3070         buffers = alloc_percpu(struct trace_buffer_struct);
3071         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3072                 return -ENOMEM;
3073
3074         trace_percpu_buffer = buffers;
3075         return 0;
3076 }
3077
3078 static int buffers_allocated;
3079
3080 void trace_printk_init_buffers(void)
3081 {
3082         if (buffers_allocated)
3083                 return;
3084
3085         if (alloc_percpu_trace_buffer())
3086                 return;
3087
3088         /* trace_printk() is for debug use only. Don't use it in production. */
3089
3090         pr_warn("\n");
3091         pr_warn("**********************************************************\n");
3092         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3093         pr_warn("**                                                      **\n");
3094         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3095         pr_warn("**                                                      **\n");
3096         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3097         pr_warn("** unsafe for production use.                           **\n");
3098         pr_warn("**                                                      **\n");
3099         pr_warn("** If you see this message and you are not debugging    **\n");
3100         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3101         pr_warn("**                                                      **\n");
3102         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3103         pr_warn("**********************************************************\n");
3104
3105         /* Expand the buffers to set size */
3106         tracing_update_buffers();
3107
3108         buffers_allocated = 1;
3109
3110         /*
3111          * trace_printk_init_buffers() can be called by modules.
3112          * If that happens, then we need to start cmdline recording
3113          * directly here. If the global_trace.buffer is already
3114          * allocated here, then this was called by module code.
3115          */
3116         if (global_trace.trace_buffer.buffer)
3117                 tracing_start_cmdline_record();
3118 }
3119 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3120
3121 void trace_printk_start_comm(void)
3122 {
3123         /* Start tracing comms if trace printk is set */
3124         if (!buffers_allocated)
3125                 return;
3126         tracing_start_cmdline_record();
3127 }
3128
3129 static void trace_printk_start_stop_comm(int enabled)
3130 {
3131         if (!buffers_allocated)
3132                 return;
3133
3134         if (enabled)
3135                 tracing_start_cmdline_record();
3136         else
3137                 tracing_stop_cmdline_record();
3138 }
3139
3140 /**
3141  * trace_vbprintk - write binary msg to tracing buffer
3142  * @ip:    The address of the caller
3143  * @fmt:   The string format to write to the buffer
3144  * @args:  Arguments for @fmt
3145  */
3146 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3147 {
3148         struct trace_event_call *call = &event_bprint;
3149         struct ring_buffer_event *event;
3150         struct ring_buffer *buffer;
3151         struct trace_array *tr = &global_trace;
3152         struct bprint_entry *entry;
3153         unsigned long flags;
3154         char *tbuffer;
3155         int len = 0, size, pc;
3156
3157         if (unlikely(tracing_selftest_running || tracing_disabled))
3158                 return 0;
3159
3160         /* Don't pollute graph traces with trace_vprintk internals */
3161         pause_graph_tracing();
3162
3163         pc = preempt_count();
3164         preempt_disable_notrace();
3165
3166         tbuffer = get_trace_buf();
3167         if (!tbuffer) {
3168                 len = 0;
3169                 goto out_nobuffer;
3170         }
3171
3172         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3173
3174         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3175                 goto out;
3176
3177         local_save_flags(flags);
3178         size = sizeof(*entry) + sizeof(u32) * len;
3179         buffer = tr->trace_buffer.buffer;
3180         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3181                                             flags, pc);
3182         if (!event)
3183                 goto out;
3184         entry = ring_buffer_event_data(event);
3185         entry->ip                       = ip;
3186         entry->fmt                      = fmt;
3187
3188         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3189         if (!call_filter_check_discard(call, entry, buffer, event)) {
3190                 __buffer_unlock_commit(buffer, event);
3191                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3192         }
3193
3194 out:
3195         put_trace_buf();
3196
3197 out_nobuffer:
3198         preempt_enable_notrace();
3199         unpause_graph_tracing();
3200
3201         return len;
3202 }
3203 EXPORT_SYMBOL_GPL(trace_vbprintk);
3204
3205 __printf(3, 0)
3206 static int
3207 __trace_array_vprintk(struct ring_buffer *buffer,
3208                       unsigned long ip, const char *fmt, va_list args)
3209 {
3210         struct trace_event_call *call = &event_print;
3211         struct ring_buffer_event *event;
3212         int len = 0, size, pc;
3213         struct print_entry *entry;
3214         unsigned long flags;
3215         char *tbuffer;
3216
3217         if (tracing_disabled || tracing_selftest_running)
3218                 return 0;
3219
3220         /* Don't pollute graph traces with trace_vprintk internals */
3221         pause_graph_tracing();
3222
3223         pc = preempt_count();
3224         preempt_disable_notrace();
3225
3226
3227         tbuffer = get_trace_buf();
3228         if (!tbuffer) {
3229                 len = 0;
3230                 goto out_nobuffer;
3231         }
3232
3233         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3234
3235         local_save_flags(flags);
3236         size = sizeof(*entry) + len + 1;
3237         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3238                                             flags, pc);
3239         if (!event)
3240                 goto out;
3241         entry = ring_buffer_event_data(event);
3242         entry->ip = ip;
3243
3244         memcpy(&entry->buf, tbuffer, len + 1);
3245         if (!call_filter_check_discard(call, entry, buffer, event)) {
3246                 __buffer_unlock_commit(buffer, event);
3247                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3248         }
3249
3250 out:
3251         put_trace_buf();
3252
3253 out_nobuffer:
3254         preempt_enable_notrace();
3255         unpause_graph_tracing();
3256
3257         return len;
3258 }
3259
3260 __printf(3, 0)
3261 int trace_array_vprintk(struct trace_array *tr,
3262                         unsigned long ip, const char *fmt, va_list args)
3263 {
3264         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3265 }
3266
3267 __printf(3, 0)
3268 int trace_array_printk(struct trace_array *tr,
3269                        unsigned long ip, const char *fmt, ...)
3270 {
3271         int ret;
3272         va_list ap;
3273
3274         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3275                 return 0;
3276
3277         if (!tr)
3278                 return -ENOENT;
3279
3280         va_start(ap, fmt);
3281         ret = trace_array_vprintk(tr, ip, fmt, ap);
3282         va_end(ap);
3283         return ret;
3284 }
3285 EXPORT_SYMBOL_GPL(trace_array_printk);
3286
3287 __printf(3, 4)
3288 int trace_array_printk_buf(struct ring_buffer *buffer,
3289                            unsigned long ip, const char *fmt, ...)
3290 {
3291         int ret;
3292         va_list ap;
3293
3294         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3295                 return 0;
3296
3297         va_start(ap, fmt);
3298         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3299         va_end(ap);
3300         return ret;
3301 }
3302
3303 __printf(2, 0)
3304 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3305 {
3306         return trace_array_vprintk(&global_trace, ip, fmt, args);
3307 }
3308 EXPORT_SYMBOL_GPL(trace_vprintk);
3309
3310 static void trace_iterator_increment(struct trace_iterator *iter)
3311 {
3312         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3313
3314         iter->idx++;
3315         if (buf_iter)
3316                 ring_buffer_read(buf_iter, NULL);
3317 }
3318
3319 static struct trace_entry *
3320 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3321                 unsigned long *lost_events)
3322 {
3323         struct ring_buffer_event *event;
3324         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3325
3326         if (buf_iter)
3327                 event = ring_buffer_iter_peek(buf_iter, ts);
3328         else
3329                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3330                                          lost_events);
3331
3332         if (event) {
3333                 iter->ent_size = ring_buffer_event_length(event);
3334                 return ring_buffer_event_data(event);
3335         }
3336         iter->ent_size = 0;
3337         return NULL;
3338 }
3339
3340 static struct trace_entry *
3341 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3342                   unsigned long *missing_events, u64 *ent_ts)
3343 {
3344         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3345         struct trace_entry *ent, *next = NULL;
3346         unsigned long lost_events = 0, next_lost = 0;
3347         int cpu_file = iter->cpu_file;
3348         u64 next_ts = 0, ts;
3349         int next_cpu = -1;
3350         int next_size = 0;
3351         int cpu;
3352
3353         /*
3354          * If we are in a per_cpu trace file, don't bother by iterating over
3355          * all cpu and peek directly.
3356          */
3357         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3358                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3359                         return NULL;
3360                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3361                 if (ent_cpu)
3362                         *ent_cpu = cpu_file;
3363
3364                 return ent;
3365         }
3366
3367         for_each_tracing_cpu(cpu) {
3368
3369                 if (ring_buffer_empty_cpu(buffer, cpu))
3370                         continue;
3371
3372                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3373
3374                 /*
3375                  * Pick the entry with the smallest timestamp:
3376                  */
3377                 if (ent && (!next || ts < next_ts)) {
3378                         next = ent;
3379                         next_cpu = cpu;
3380                         next_ts = ts;
3381                         next_lost = lost_events;
3382                         next_size = iter->ent_size;
3383                 }
3384         }
3385
3386         iter->ent_size = next_size;
3387
3388         if (ent_cpu)
3389                 *ent_cpu = next_cpu;
3390
3391         if (ent_ts)
3392                 *ent_ts = next_ts;
3393
3394         if (missing_events)
3395                 *missing_events = next_lost;
3396
3397         return next;
3398 }
3399
3400 /* Find the next real entry, without updating the iterator itself */
3401 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3402                                           int *ent_cpu, u64 *ent_ts)
3403 {
3404         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3405 }
3406
3407 /* Find the next real entry, and increment the iterator to the next entry */
3408 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3409 {
3410         iter->ent = __find_next_entry(iter, &iter->cpu,
3411                                       &iter->lost_events, &iter->ts);
3412
3413         if (iter->ent)
3414                 trace_iterator_increment(iter);
3415
3416         return iter->ent ? iter : NULL;
3417 }
3418
3419 static void trace_consume(struct trace_iterator *iter)
3420 {
3421         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3422                             &iter->lost_events);
3423 }
3424
3425 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3426 {
3427         struct trace_iterator *iter = m->private;
3428         int i = (int)*pos;
3429         void *ent;
3430
3431         WARN_ON_ONCE(iter->leftover);
3432
3433         (*pos)++;
3434
3435         /* can't go backwards */
3436         if (iter->idx > i)
3437                 return NULL;
3438
3439         if (iter->idx < 0)
3440                 ent = trace_find_next_entry_inc(iter);
3441         else
3442                 ent = iter;
3443
3444         while (ent && iter->idx < i)
3445                 ent = trace_find_next_entry_inc(iter);
3446
3447         iter->pos = *pos;
3448
3449         return ent;
3450 }
3451
3452 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3453 {
3454         struct ring_buffer_event *event;
3455         struct ring_buffer_iter *buf_iter;
3456         unsigned long entries = 0;
3457         u64 ts;
3458
3459         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3460
3461         buf_iter = trace_buffer_iter(iter, cpu);
3462         if (!buf_iter)
3463                 return;
3464
3465         ring_buffer_iter_reset(buf_iter);
3466
3467         /*
3468          * We could have the case with the max latency tracers
3469          * that a reset never took place on a cpu. This is evident
3470          * by the timestamp being before the start of the buffer.
3471          */
3472         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3473                 if (ts >= iter->trace_buffer->time_start)
3474                         break;
3475                 entries++;
3476                 ring_buffer_read(buf_iter, NULL);
3477         }
3478
3479         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3480 }
3481
3482 /*
3483  * The current tracer is copied to avoid a global locking
3484  * all around.
3485  */
3486 static void *s_start(struct seq_file *m, loff_t *pos)
3487 {
3488         struct trace_iterator *iter = m->private;
3489         struct trace_array *tr = iter->tr;
3490         int cpu_file = iter->cpu_file;
3491         void *p = NULL;
3492         loff_t l = 0;
3493         int cpu;
3494
3495         /*
3496          * copy the tracer to avoid using a global lock all around.
3497          * iter->trace is a copy of current_trace, the pointer to the
3498          * name may be used instead of a strcmp(), as iter->trace->name
3499          * will point to the same string as current_trace->name.
3500          */
3501         mutex_lock(&trace_types_lock);
3502         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3503                 /* Close iter->trace before switching to the new current tracer */
3504                 if (iter->trace->close)
3505                         iter->trace->close(iter);
3506                 *iter->trace = *tr->current_trace;
3507                 /* Reopen the new current tracer */
3508                 if (iter->trace->open)
3509                         iter->trace->open(iter);
3510         }
3511         mutex_unlock(&trace_types_lock);
3512
3513 #ifdef CONFIG_TRACER_MAX_TRACE
3514         if (iter->snapshot && iter->trace->use_max_tr)
3515                 return ERR_PTR(-EBUSY);
3516 #endif
3517
3518         if (*pos != iter->pos) {
3519                 iter->ent = NULL;
3520                 iter->cpu = 0;
3521                 iter->idx = -1;
3522
3523                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3524                         for_each_tracing_cpu(cpu)
3525                                 tracing_iter_reset(iter, cpu);
3526                 } else
3527                         tracing_iter_reset(iter, cpu_file);
3528
3529                 iter->leftover = 0;
3530                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3531                         ;
3532
3533         } else {
3534                 /*
3535                  * If we overflowed the seq_file before, then we want
3536                  * to just reuse the trace_seq buffer again.
3537                  */
3538                 if (iter->leftover)
3539                         p = iter;
3540                 else {
3541                         l = *pos - 1;
3542                         p = s_next(m, p, &l);
3543                 }
3544         }
3545
3546         trace_event_read_lock();
3547         trace_access_lock(cpu_file);
3548         return p;
3549 }
3550
3551 static void s_stop(struct seq_file *m, void *p)
3552 {
3553         struct trace_iterator *iter = m->private;
3554
3555 #ifdef CONFIG_TRACER_MAX_TRACE
3556         if (iter->snapshot && iter->trace->use_max_tr)
3557                 return;
3558 #endif
3559
3560         trace_access_unlock(iter->cpu_file);
3561         trace_event_read_unlock();
3562 }
3563
3564 static void
3565 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3566                       unsigned long *entries, int cpu)
3567 {
3568         unsigned long count;
3569
3570         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3571         /*
3572          * If this buffer has skipped entries, then we hold all
3573          * entries for the trace and we need to ignore the
3574          * ones before the time stamp.
3575          */
3576         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3577                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3578                 /* total is the same as the entries */
3579                 *total = count;
3580         } else
3581                 *total = count +
3582                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3583         *entries = count;
3584 }
3585
3586 static void
3587 get_total_entries(struct trace_buffer *buf,
3588                   unsigned long *total, unsigned long *entries)
3589 {
3590         unsigned long t, e;
3591         int cpu;
3592
3593         *total = 0;
3594         *entries = 0;
3595
3596         for_each_tracing_cpu(cpu) {
3597                 get_total_entries_cpu(buf, &t, &e, cpu);
3598                 *total += t;
3599                 *entries += e;
3600         }
3601 }
3602
3603 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3604 {
3605         unsigned long total, entries;
3606
3607         if (!tr)
3608                 tr = &global_trace;
3609
3610         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3611
3612         return entries;
3613 }
3614
3615 unsigned long trace_total_entries(struct trace_array *tr)
3616 {
3617         unsigned long total, entries;
3618
3619         if (!tr)
3620                 tr = &global_trace;
3621
3622         get_total_entries(&tr->trace_buffer, &total, &entries);
3623
3624         return entries;
3625 }
3626
3627 static void print_lat_help_header(struct seq_file *m)
3628 {
3629         seq_puts(m, "#                    _------=> CPU#            \n"
3630                     "#                   / _-----=> irqs-off        \n"
3631                     "#                  | / _----=> need-resched    \n"
3632                     "#                  || / _---=> hardirq/softirq \n"
3633                     "#                  ||| / _--=> preempt-depth   \n"
3634                     "#                  |||| /     delay            \n"
3635                     "#  cmd     pid     ||||| time  |   caller      \n"
3636                     "#     \\   /        |||||  \\    |   /         \n");
3637 }
3638
3639 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3640 {
3641         unsigned long total;
3642         unsigned long entries;
3643
3644         get_total_entries(buf, &total, &entries);
3645         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3646                    entries, total, num_online_cpus());
3647         seq_puts(m, "#\n");
3648 }
3649
3650 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3651                                    unsigned int flags)
3652 {
3653         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3654
3655         print_event_info(buf, m);
3656
3657         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3658         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3659 }
3660
3661 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3662                                        unsigned int flags)
3663 {
3664         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3665         const char *space = "            ";
3666         int prec = tgid ? 12 : 2;
3667
3668         print_event_info(buf, m);
3669
3670         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3671         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3672         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3673         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3674         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3675         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3676         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3677 }
3678
3679 void
3680 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3681 {
3682         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3683         struct trace_buffer *buf = iter->trace_buffer;
3684         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3685         struct tracer *type = iter->trace;
3686         unsigned long entries;
3687         unsigned long total;
3688         const char *name = "preemption";
3689
3690         name = type->name;
3691
3692         get_total_entries(buf, &total, &entries);
3693
3694         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3695                    name, UTS_RELEASE);
3696         seq_puts(m, "# -----------------------------------"
3697                  "---------------------------------\n");
3698         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3699                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3700                    nsecs_to_usecs(data->saved_latency),
3701                    entries,
3702                    total,
3703                    buf->cpu,
3704 #if defined(CONFIG_PREEMPT_NONE)
3705                    "server",
3706 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3707                    "desktop",
3708 #elif defined(CONFIG_PREEMPT)
3709                    "preempt",
3710 #else
3711                    "unknown",
3712 #endif
3713                    /* These are reserved for later use */
3714                    0, 0, 0, 0);
3715 #ifdef CONFIG_SMP
3716         seq_printf(m, " #P:%d)\n", num_online_cpus());
3717 #else
3718         seq_puts(m, ")\n");
3719 #endif
3720         seq_puts(m, "#    -----------------\n");
3721         seq_printf(m, "#    | task: %.16s-%d "
3722                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3723                    data->comm, data->pid,
3724                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3725                    data->policy, data->rt_priority);
3726         seq_puts(m, "#    -----------------\n");
3727
3728         if (data->critical_start) {
3729                 seq_puts(m, "#  => started at: ");
3730                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3731                 trace_print_seq(m, &iter->seq);
3732                 seq_puts(m, "\n#  => ended at:   ");
3733                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3734                 trace_print_seq(m, &iter->seq);
3735                 seq_puts(m, "\n#\n");
3736         }
3737
3738         seq_puts(m, "#\n");
3739 }
3740
3741 static void test_cpu_buff_start(struct trace_iterator *iter)
3742 {
3743         struct trace_seq *s = &iter->seq;
3744         struct trace_array *tr = iter->tr;
3745
3746         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3747                 return;
3748
3749         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3750                 return;
3751
3752         if (cpumask_available(iter->started) &&
3753             cpumask_test_cpu(iter->cpu, iter->started))
3754                 return;
3755
3756         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3757                 return;
3758
3759         if (cpumask_available(iter->started))
3760                 cpumask_set_cpu(iter->cpu, iter->started);
3761
3762         /* Don't print started cpu buffer for the first entry of the trace */
3763         if (iter->idx > 1)
3764                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3765                                 iter->cpu);
3766 }
3767
3768 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3769 {
3770         struct trace_array *tr = iter->tr;
3771         struct trace_seq *s = &iter->seq;
3772         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3773         struct trace_entry *entry;
3774         struct trace_event *event;
3775
3776         entry = iter->ent;
3777
3778         test_cpu_buff_start(iter);
3779
3780         event = ftrace_find_event(entry->type);
3781
3782         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3783                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3784                         trace_print_lat_context(iter);
3785                 else
3786                         trace_print_context(iter);
3787         }
3788
3789         if (trace_seq_has_overflowed(s))
3790                 return TRACE_TYPE_PARTIAL_LINE;
3791
3792         if (event)
3793                 return event->funcs->trace(iter, sym_flags, event);
3794
3795         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3796
3797         return trace_handle_return(s);
3798 }
3799
3800 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3801 {
3802         struct trace_array *tr = iter->tr;
3803         struct trace_seq *s = &iter->seq;
3804         struct trace_entry *entry;
3805         struct trace_event *event;
3806
3807         entry = iter->ent;
3808
3809         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3810                 trace_seq_printf(s, "%d %d %llu ",
3811                                  entry->pid, iter->cpu, iter->ts);
3812
3813         if (trace_seq_has_overflowed(s))
3814                 return TRACE_TYPE_PARTIAL_LINE;
3815
3816         event = ftrace_find_event(entry->type);
3817         if (event)
3818                 return event->funcs->raw(iter, 0, event);
3819
3820         trace_seq_printf(s, "%d ?\n", entry->type);
3821
3822         return trace_handle_return(s);
3823 }
3824
3825 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3826 {
3827         struct trace_array *tr = iter->tr;
3828         struct trace_seq *s = &iter->seq;
3829         unsigned char newline = '\n';
3830         struct trace_entry *entry;
3831         struct trace_event *event;
3832
3833         entry = iter->ent;
3834
3835         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3836                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3837                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3838                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3839                 if (trace_seq_has_overflowed(s))
3840                         return TRACE_TYPE_PARTIAL_LINE;
3841         }
3842
3843         event = ftrace_find_event(entry->type);
3844         if (event) {
3845                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3846                 if (ret != TRACE_TYPE_HANDLED)
3847                         return ret;
3848         }
3849
3850         SEQ_PUT_FIELD(s, newline);
3851
3852         return trace_handle_return(s);
3853 }
3854
3855 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3856 {
3857         struct trace_array *tr = iter->tr;
3858         struct trace_seq *s = &iter->seq;
3859         struct trace_entry *entry;
3860         struct trace_event *event;
3861
3862         entry = iter->ent;
3863
3864         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3865                 SEQ_PUT_FIELD(s, entry->pid);
3866                 SEQ_PUT_FIELD(s, iter->cpu);
3867                 SEQ_PUT_FIELD(s, iter->ts);
3868                 if (trace_seq_has_overflowed(s))
3869                         return TRACE_TYPE_PARTIAL_LINE;
3870         }
3871
3872         event = ftrace_find_event(entry->type);
3873         return event ? event->funcs->binary(iter, 0, event) :
3874                 TRACE_TYPE_HANDLED;
3875 }
3876
3877 int trace_empty(struct trace_iterator *iter)
3878 {
3879         struct ring_buffer_iter *buf_iter;
3880         int cpu;
3881
3882         /* If we are looking at one CPU buffer, only check that one */
3883         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3884                 cpu = iter->cpu_file;
3885                 buf_iter = trace_buffer_iter(iter, cpu);
3886                 if (buf_iter) {
3887                         if (!ring_buffer_iter_empty(buf_iter))
3888                                 return 0;
3889                 } else {
3890                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3891                                 return 0;
3892                 }
3893                 return 1;
3894         }
3895
3896         for_each_tracing_cpu(cpu) {
3897                 buf_iter = trace_buffer_iter(iter, cpu);
3898                 if (buf_iter) {
3899                         if (!ring_buffer_iter_empty(buf_iter))
3900                                 return 0;
3901                 } else {
3902                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3903                                 return 0;
3904                 }
3905         }
3906
3907         return 1;
3908 }
3909
3910 /*  Called with trace_event_read_lock() held. */
3911 enum print_line_t print_trace_line(struct trace_iterator *iter)
3912 {
3913         struct trace_array *tr = iter->tr;
3914         unsigned long trace_flags = tr->trace_flags;
3915         enum print_line_t ret;
3916
3917         if (iter->lost_events) {
3918                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3919                                  iter->cpu, iter->lost_events);
3920                 if (trace_seq_has_overflowed(&iter->seq))
3921                         return TRACE_TYPE_PARTIAL_LINE;
3922         }
3923
3924         if (iter->trace && iter->trace->print_line) {
3925                 ret = iter->trace->print_line(iter);
3926                 if (ret != TRACE_TYPE_UNHANDLED)
3927                         return ret;
3928         }
3929
3930         if (iter->ent->type == TRACE_BPUTS &&
3931                         trace_flags & TRACE_ITER_PRINTK &&
3932                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3933                 return trace_print_bputs_msg_only(iter);
3934
3935         if (iter->ent->type == TRACE_BPRINT &&
3936                         trace_flags & TRACE_ITER_PRINTK &&
3937                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3938                 return trace_print_bprintk_msg_only(iter);
3939
3940         if (iter->ent->type == TRACE_PRINT &&
3941                         trace_flags & TRACE_ITER_PRINTK &&
3942                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3943                 return trace_print_printk_msg_only(iter);
3944
3945         if (trace_flags & TRACE_ITER_BIN)
3946                 return print_bin_fmt(iter);
3947
3948         if (trace_flags & TRACE_ITER_HEX)
3949                 return print_hex_fmt(iter);
3950
3951         if (trace_flags & TRACE_ITER_RAW)
3952                 return print_raw_fmt(iter);
3953
3954         return print_trace_fmt(iter);
3955 }
3956
3957 void trace_latency_header(struct seq_file *m)
3958 {
3959         struct trace_iterator *iter = m->private;
3960         struct trace_array *tr = iter->tr;
3961
3962         /* print nothing if the buffers are empty */
3963         if (trace_empty(iter))
3964                 return;
3965
3966         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3967                 print_trace_header(m, iter);
3968
3969         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3970                 print_lat_help_header(m);
3971 }
3972
3973 void trace_default_header(struct seq_file *m)
3974 {
3975         struct trace_iterator *iter = m->private;
3976         struct trace_array *tr = iter->tr;
3977         unsigned long trace_flags = tr->trace_flags;
3978
3979         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3980                 return;
3981
3982         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3983                 /* print nothing if the buffers are empty */
3984                 if (trace_empty(iter))
3985                         return;
3986                 print_trace_header(m, iter);
3987                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3988                         print_lat_help_header(m);
3989         } else {
3990                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3991                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3992                                 print_func_help_header_irq(iter->trace_buffer,
3993                                                            m, trace_flags);
3994                         else
3995                                 print_func_help_header(iter->trace_buffer, m,
3996                                                        trace_flags);
3997                 }
3998         }
3999 }
4000
4001 static void test_ftrace_alive(struct seq_file *m)
4002 {
4003         if (!ftrace_is_dead())
4004                 return;
4005         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4006                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4007 }
4008
4009 #ifdef CONFIG_TRACER_MAX_TRACE
4010 static void show_snapshot_main_help(struct seq_file *m)
4011 {
4012         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4013                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4014                     "#                      Takes a snapshot of the main buffer.\n"
4015                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4016                     "#                      (Doesn't have to be '2' works with any number that\n"
4017                     "#                       is not a '0' or '1')\n");
4018 }
4019
4020 static void show_snapshot_percpu_help(struct seq_file *m)
4021 {
4022         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4023 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4024         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4025                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4026 #else
4027         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4028                     "#                     Must use main snapshot file to allocate.\n");
4029 #endif
4030         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4031                     "#                      (Doesn't have to be '2' works with any number that\n"
4032                     "#                       is not a '0' or '1')\n");
4033 }
4034
4035 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4036 {
4037         if (iter->tr->allocated_snapshot)
4038                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4039         else
4040                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4041
4042         seq_puts(m, "# Snapshot commands:\n");
4043         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4044                 show_snapshot_main_help(m);
4045         else
4046                 show_snapshot_percpu_help(m);
4047 }
4048 #else
4049 /* Should never be called */
4050 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4051 #endif
4052
4053 static int s_show(struct seq_file *m, void *v)
4054 {
4055         struct trace_iterator *iter = v;
4056         int ret;
4057
4058         if (iter->ent == NULL) {
4059                 if (iter->tr) {
4060                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4061                         seq_puts(m, "#\n");
4062                         test_ftrace_alive(m);
4063                 }
4064                 if (iter->snapshot && trace_empty(iter))
4065                         print_snapshot_help(m, iter);
4066                 else if (iter->trace && iter->trace->print_header)
4067                         iter->trace->print_header(m);
4068                 else
4069                         trace_default_header(m);
4070
4071         } else if (iter->leftover) {
4072                 /*
4073                  * If we filled the seq_file buffer earlier, we
4074                  * want to just show it now.
4075                  */
4076                 ret = trace_print_seq(m, &iter->seq);
4077
4078                 /* ret should this time be zero, but you never know */
4079                 iter->leftover = ret;
4080
4081         } else {
4082                 print_trace_line(iter);
4083                 ret = trace_print_seq(m, &iter->seq);
4084                 /*
4085                  * If we overflow the seq_file buffer, then it will
4086                  * ask us for this data again at start up.
4087                  * Use that instead.
4088                  *  ret is 0 if seq_file write succeeded.
4089                  *        -1 otherwise.
4090                  */
4091                 iter->leftover = ret;
4092         }
4093
4094         return 0;
4095 }
4096
4097 /*
4098  * Should be used after trace_array_get(), trace_types_lock
4099  * ensures that i_cdev was already initialized.
4100  */
4101 static inline int tracing_get_cpu(struct inode *inode)
4102 {
4103         if (inode->i_cdev) /* See trace_create_cpu_file() */
4104                 return (long)inode->i_cdev - 1;
4105         return RING_BUFFER_ALL_CPUS;
4106 }
4107
4108 static const struct seq_operations tracer_seq_ops = {
4109         .start          = s_start,
4110         .next           = s_next,
4111         .stop           = s_stop,
4112         .show           = s_show,
4113 };
4114
4115 static struct trace_iterator *
4116 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4117 {
4118         struct trace_array *tr = inode->i_private;
4119         struct trace_iterator *iter;
4120         int cpu;
4121
4122         if (tracing_disabled)
4123                 return ERR_PTR(-ENODEV);
4124
4125         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4126         if (!iter)
4127                 return ERR_PTR(-ENOMEM);
4128
4129         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4130                                     GFP_KERNEL);
4131         if (!iter->buffer_iter)
4132                 goto release;
4133
4134         /*
4135          * We make a copy of the current tracer to avoid concurrent
4136          * changes on it while we are reading.
4137          */
4138         mutex_lock(&trace_types_lock);
4139         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4140         if (!iter->trace)
4141                 goto fail;
4142
4143         *iter->trace = *tr->current_trace;
4144
4145         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4146                 goto fail;
4147
4148         iter->tr = tr;
4149
4150 #ifdef CONFIG_TRACER_MAX_TRACE
4151         /* Currently only the top directory has a snapshot */
4152         if (tr->current_trace->print_max || snapshot)
4153                 iter->trace_buffer = &tr->max_buffer;
4154         else
4155 #endif
4156                 iter->trace_buffer = &tr->trace_buffer;
4157         iter->snapshot = snapshot;
4158         iter->pos = -1;
4159         iter->cpu_file = tracing_get_cpu(inode);
4160         mutex_init(&iter->mutex);
4161
4162         /* Notify the tracer early; before we stop tracing. */
4163         if (iter->trace && iter->trace->open)
4164                 iter->trace->open(iter);
4165
4166         /* Annotate start of buffers if we had overruns */
4167         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4168                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4169
4170         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4171         if (trace_clocks[tr->clock_id].in_ns)
4172                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4173
4174         /* stop the trace while dumping if we are not opening "snapshot" */
4175         if (!iter->snapshot)
4176                 tracing_stop_tr(tr);
4177
4178         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4179                 for_each_tracing_cpu(cpu) {
4180                         iter->buffer_iter[cpu] =
4181                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4182                                                          cpu, GFP_KERNEL);
4183                 }
4184                 ring_buffer_read_prepare_sync();
4185                 for_each_tracing_cpu(cpu) {
4186                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4187                         tracing_iter_reset(iter, cpu);
4188                 }
4189         } else {
4190                 cpu = iter->cpu_file;
4191                 iter->buffer_iter[cpu] =
4192                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4193                                                  cpu, GFP_KERNEL);
4194                 ring_buffer_read_prepare_sync();
4195                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4196                 tracing_iter_reset(iter, cpu);
4197         }
4198
4199         mutex_unlock(&trace_types_lock);
4200
4201         return iter;
4202
4203  fail:
4204         mutex_unlock(&trace_types_lock);
4205         kfree(iter->trace);
4206         kfree(iter->buffer_iter);
4207 release:
4208         seq_release_private(inode, file);
4209         return ERR_PTR(-ENOMEM);
4210 }
4211
4212 int tracing_open_generic(struct inode *inode, struct file *filp)
4213 {
4214         int ret;
4215
4216         ret = tracing_check_open_get_tr(NULL);
4217         if (ret)
4218                 return ret;
4219
4220         filp->private_data = inode->i_private;
4221         return 0;
4222 }
4223
4224 bool tracing_is_disabled(void)
4225 {
4226         return (tracing_disabled) ? true: false;
4227 }
4228
4229 /*
4230  * Open and update trace_array ref count.
4231  * Must have the current trace_array passed to it.
4232  */
4233 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4234 {
4235         struct trace_array *tr = inode->i_private;
4236         int ret;
4237
4238         ret = tracing_check_open_get_tr(tr);
4239         if (ret)
4240                 return ret;
4241
4242         filp->private_data = inode->i_private;
4243
4244         return 0;
4245 }
4246
4247 static int tracing_release(struct inode *inode, struct file *file)
4248 {
4249         struct trace_array *tr = inode->i_private;
4250         struct seq_file *m = file->private_data;
4251         struct trace_iterator *iter;
4252         int cpu;
4253
4254         if (!(file->f_mode & FMODE_READ)) {
4255                 trace_array_put(tr);
4256                 return 0;
4257         }
4258
4259         /* Writes do not use seq_file */
4260         iter = m->private;
4261         mutex_lock(&trace_types_lock);
4262
4263         for_each_tracing_cpu(cpu) {
4264                 if (iter->buffer_iter[cpu])
4265                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4266         }
4267
4268         if (iter->trace && iter->trace->close)
4269                 iter->trace->close(iter);
4270
4271         if (!iter->snapshot)
4272                 /* reenable tracing if it was previously enabled */
4273                 tracing_start_tr(tr);
4274
4275         __trace_array_put(tr);
4276
4277         mutex_unlock(&trace_types_lock);
4278
4279         mutex_destroy(&iter->mutex);
4280         free_cpumask_var(iter->started);
4281         kfree(iter->trace);
4282         kfree(iter->buffer_iter);
4283         seq_release_private(inode, file);
4284
4285         return 0;
4286 }
4287
4288 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4289 {
4290         struct trace_array *tr = inode->i_private;
4291
4292         trace_array_put(tr);
4293         return 0;
4294 }
4295
4296 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4297 {
4298         struct trace_array *tr = inode->i_private;
4299
4300         trace_array_put(tr);
4301
4302         return single_release(inode, file);
4303 }
4304
4305 static int tracing_open(struct inode *inode, struct file *file)
4306 {
4307         struct trace_array *tr = inode->i_private;
4308         struct trace_iterator *iter;
4309         int ret;
4310
4311         ret = tracing_check_open_get_tr(tr);
4312         if (ret)
4313                 return ret;
4314
4315         /* If this file was open for write, then erase contents */
4316         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4317                 int cpu = tracing_get_cpu(inode);
4318                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4319
4320 #ifdef CONFIG_TRACER_MAX_TRACE
4321                 if (tr->current_trace->print_max)
4322                         trace_buf = &tr->max_buffer;
4323 #endif
4324
4325                 if (cpu == RING_BUFFER_ALL_CPUS)
4326                         tracing_reset_online_cpus(trace_buf);
4327                 else
4328                         tracing_reset_cpu(trace_buf, cpu);
4329         }
4330
4331         if (file->f_mode & FMODE_READ) {
4332                 iter = __tracing_open(inode, file, false);
4333                 if (IS_ERR(iter))
4334                         ret = PTR_ERR(iter);
4335                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4336                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4337         }
4338
4339         if (ret < 0)
4340                 trace_array_put(tr);
4341
4342         return ret;
4343 }
4344
4345 /*
4346  * Some tracers are not suitable for instance buffers.
4347  * A tracer is always available for the global array (toplevel)
4348  * or if it explicitly states that it is.
4349  */
4350 static bool
4351 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4352 {
4353         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4354 }
4355
4356 /* Find the next tracer that this trace array may use */
4357 static struct tracer *
4358 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4359 {
4360         while (t && !trace_ok_for_array(t, tr))
4361                 t = t->next;
4362
4363         return t;
4364 }
4365
4366 static void *
4367 t_next(struct seq_file *m, void *v, loff_t *pos)
4368 {
4369         struct trace_array *tr = m->private;
4370         struct tracer *t = v;
4371
4372         (*pos)++;
4373
4374         if (t)
4375                 t = get_tracer_for_array(tr, t->next);
4376
4377         return t;
4378 }
4379
4380 static void *t_start(struct seq_file *m, loff_t *pos)
4381 {
4382         struct trace_array *tr = m->private;
4383         struct tracer *t;
4384         loff_t l = 0;
4385
4386         mutex_lock(&trace_types_lock);
4387
4388         t = get_tracer_for_array(tr, trace_types);
4389         for (; t && l < *pos; t = t_next(m, t, &l))
4390                         ;
4391
4392         return t;
4393 }
4394
4395 static void t_stop(struct seq_file *m, void *p)
4396 {
4397         mutex_unlock(&trace_types_lock);
4398 }
4399
4400 static int t_show(struct seq_file *m, void *v)
4401 {
4402         struct tracer *t = v;
4403
4404         if (!t)
4405                 return 0;
4406
4407         seq_puts(m, t->name);
4408         if (t->next)
4409                 seq_putc(m, ' ');
4410         else
4411                 seq_putc(m, '\n');
4412
4413         return 0;
4414 }
4415
4416 static const struct seq_operations show_traces_seq_ops = {
4417         .start          = t_start,
4418         .next           = t_next,
4419         .stop           = t_stop,
4420         .show           = t_show,
4421 };
4422
4423 static int show_traces_open(struct inode *inode, struct file *file)
4424 {
4425         struct trace_array *tr = inode->i_private;
4426         struct seq_file *m;
4427         int ret;
4428
4429         ret = tracing_check_open_get_tr(tr);
4430         if (ret)
4431                 return ret;
4432
4433         ret = seq_open(file, &show_traces_seq_ops);
4434         if (ret) {
4435                 trace_array_put(tr);
4436                 return ret;
4437         }
4438
4439         m = file->private_data;
4440         m->private = tr;
4441
4442         return 0;
4443 }
4444
4445 static int show_traces_release(struct inode *inode, struct file *file)
4446 {
4447         struct trace_array *tr = inode->i_private;
4448
4449         trace_array_put(tr);
4450         return seq_release(inode, file);
4451 }
4452
4453 static ssize_t
4454 tracing_write_stub(struct file *filp, const char __user *ubuf,
4455                    size_t count, loff_t *ppos)
4456 {
4457         return count;
4458 }
4459
4460 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4461 {
4462         int ret;
4463
4464         if (file->f_mode & FMODE_READ)
4465                 ret = seq_lseek(file, offset, whence);
4466         else
4467                 file->f_pos = ret = 0;
4468
4469         return ret;
4470 }
4471
4472 static const struct file_operations tracing_fops = {
4473         .open           = tracing_open,
4474         .read           = seq_read,
4475         .write          = tracing_write_stub,
4476         .llseek         = tracing_lseek,
4477         .release        = tracing_release,
4478 };
4479
4480 static const struct file_operations show_traces_fops = {
4481         .open           = show_traces_open,
4482         .read           = seq_read,
4483         .llseek         = seq_lseek,
4484         .release        = show_traces_release,
4485 };
4486
4487 static ssize_t
4488 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4489                      size_t count, loff_t *ppos)
4490 {
4491         struct trace_array *tr = file_inode(filp)->i_private;
4492         char *mask_str;
4493         int len;
4494
4495         len = snprintf(NULL, 0, "%*pb\n",
4496                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4497         mask_str = kmalloc(len, GFP_KERNEL);
4498         if (!mask_str)
4499                 return -ENOMEM;
4500
4501         len = snprintf(mask_str, len, "%*pb\n",
4502                        cpumask_pr_args(tr->tracing_cpumask));
4503         if (len >= count) {
4504                 count = -EINVAL;
4505                 goto out_err;
4506         }
4507         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4508
4509 out_err:
4510         kfree(mask_str);
4511
4512         return count;
4513 }
4514
4515 static ssize_t
4516 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4517                       size_t count, loff_t *ppos)
4518 {
4519         struct trace_array *tr = file_inode(filp)->i_private;
4520         cpumask_var_t tracing_cpumask_new;
4521         int err, cpu;
4522
4523         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4524                 return -ENOMEM;
4525
4526         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4527         if (err)
4528                 goto err_unlock;
4529
4530         local_irq_disable();
4531         arch_spin_lock(&tr->max_lock);
4532         for_each_tracing_cpu(cpu) {
4533                 /*
4534                  * Increase/decrease the disabled counter if we are
4535                  * about to flip a bit in the cpumask:
4536                  */
4537                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4538                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4539                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4540                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4541                 }
4542                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4543                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4544                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4545                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4546                 }
4547         }
4548         arch_spin_unlock(&tr->max_lock);
4549         local_irq_enable();
4550
4551         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4552         free_cpumask_var(tracing_cpumask_new);
4553
4554         return count;
4555
4556 err_unlock:
4557         free_cpumask_var(tracing_cpumask_new);
4558
4559         return err;
4560 }
4561
4562 static const struct file_operations tracing_cpumask_fops = {
4563         .open           = tracing_open_generic_tr,
4564         .read           = tracing_cpumask_read,
4565         .write          = tracing_cpumask_write,
4566         .release        = tracing_release_generic_tr,
4567         .llseek         = generic_file_llseek,
4568 };
4569
4570 static int tracing_trace_options_show(struct seq_file *m, void *v)
4571 {
4572         struct tracer_opt *trace_opts;
4573         struct trace_array *tr = m->private;
4574         u32 tracer_flags;
4575         int i;
4576
4577         mutex_lock(&trace_types_lock);
4578         tracer_flags = tr->current_trace->flags->val;
4579         trace_opts = tr->current_trace->flags->opts;
4580
4581         for (i = 0; trace_options[i]; i++) {
4582                 if (tr->trace_flags & (1 << i))
4583                         seq_printf(m, "%s\n", trace_options[i]);
4584                 else
4585                         seq_printf(m, "no%s\n", trace_options[i]);
4586         }
4587
4588         for (i = 0; trace_opts[i].name; i++) {
4589                 if (tracer_flags & trace_opts[i].bit)
4590                         seq_printf(m, "%s\n", trace_opts[i].name);
4591                 else
4592                         seq_printf(m, "no%s\n", trace_opts[i].name);
4593         }
4594         mutex_unlock(&trace_types_lock);
4595
4596         return 0;
4597 }
4598
4599 static int __set_tracer_option(struct trace_array *tr,
4600                                struct tracer_flags *tracer_flags,
4601                                struct tracer_opt *opts, int neg)
4602 {
4603         struct tracer *trace = tracer_flags->trace;
4604         int ret;
4605
4606         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4607         if (ret)
4608                 return ret;
4609
4610         if (neg)
4611                 tracer_flags->val &= ~opts->bit;
4612         else
4613                 tracer_flags->val |= opts->bit;
4614         return 0;
4615 }
4616
4617 /* Try to assign a tracer specific option */
4618 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4619 {
4620         struct tracer *trace = tr->current_trace;
4621         struct tracer_flags *tracer_flags = trace->flags;
4622         struct tracer_opt *opts = NULL;
4623         int i;
4624
4625         for (i = 0; tracer_flags->opts[i].name; i++) {
4626                 opts = &tracer_flags->opts[i];
4627
4628                 if (strcmp(cmp, opts->name) == 0)
4629                         return __set_tracer_option(tr, trace->flags, opts, neg);
4630         }
4631
4632         return -EINVAL;
4633 }
4634
4635 /* Some tracers require overwrite to stay enabled */
4636 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4637 {
4638         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4639                 return -1;
4640
4641         return 0;
4642 }
4643
4644 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4645 {
4646         int *map;
4647
4648         if ((mask == TRACE_ITER_RECORD_TGID) ||
4649             (mask == TRACE_ITER_RECORD_CMD))
4650                 lockdep_assert_held(&event_mutex);
4651
4652         /* do nothing if flag is already set */
4653         if (!!(tr->trace_flags & mask) == !!enabled)
4654                 return 0;
4655
4656         /* Give the tracer a chance to approve the change */
4657         if (tr->current_trace->flag_changed)
4658                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4659                         return -EINVAL;
4660
4661         if (enabled)
4662                 tr->trace_flags |= mask;
4663         else
4664                 tr->trace_flags &= ~mask;
4665
4666         if (mask == TRACE_ITER_RECORD_CMD)
4667                 trace_event_enable_cmd_record(enabled);
4668
4669         if (mask == TRACE_ITER_RECORD_TGID) {
4670                 if (!tgid_map) {
4671                         tgid_map_max = pid_max;
4672                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4673                                        GFP_KERNEL);
4674
4675                         /*
4676                          * Pairs with smp_load_acquire() in
4677                          * trace_find_tgid_ptr() to ensure that if it observes
4678                          * the tgid_map we just allocated then it also observes
4679                          * the corresponding tgid_map_max value.
4680                          */
4681                         smp_store_release(&tgid_map, map);
4682                 }
4683                 if (!tgid_map) {
4684                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4685                         return -ENOMEM;
4686                 }
4687
4688                 trace_event_enable_tgid_record(enabled);
4689         }
4690
4691         if (mask == TRACE_ITER_EVENT_FORK)
4692                 trace_event_follow_fork(tr, enabled);
4693
4694         if (mask == TRACE_ITER_FUNC_FORK)
4695                 ftrace_pid_follow_fork(tr, enabled);
4696
4697         if (mask == TRACE_ITER_OVERWRITE) {
4698                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4699 #ifdef CONFIG_TRACER_MAX_TRACE
4700                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4701 #endif
4702         }
4703
4704         if (mask == TRACE_ITER_PRINTK) {
4705                 trace_printk_start_stop_comm(enabled);
4706                 trace_printk_control(enabled);
4707         }
4708
4709         return 0;
4710 }
4711
4712 static int trace_set_options(struct trace_array *tr, char *option)
4713 {
4714         char *cmp;
4715         int neg = 0;
4716         int ret;
4717         size_t orig_len = strlen(option);
4718         int len;
4719
4720         cmp = strstrip(option);
4721
4722         len = str_has_prefix(cmp, "no");
4723         if (len)
4724                 neg = 1;
4725
4726         cmp += len;
4727
4728         mutex_lock(&event_mutex);
4729         mutex_lock(&trace_types_lock);
4730
4731         ret = match_string(trace_options, -1, cmp);
4732         /* If no option could be set, test the specific tracer options */
4733         if (ret < 0)
4734                 ret = set_tracer_option(tr, cmp, neg);
4735         else
4736                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4737
4738         mutex_unlock(&trace_types_lock);
4739         mutex_unlock(&event_mutex);
4740
4741         /*
4742          * If the first trailing whitespace is replaced with '\0' by strstrip,
4743          * turn it back into a space.
4744          */
4745         if (orig_len > strlen(option))
4746                 option[strlen(option)] = ' ';
4747
4748         return ret;
4749 }
4750
4751 static void __init apply_trace_boot_options(void)
4752 {
4753         char *buf = trace_boot_options_buf;
4754         char *option;
4755
4756         while (true) {
4757                 option = strsep(&buf, ",");
4758
4759                 if (!option)
4760                         break;
4761
4762                 if (*option)
4763                         trace_set_options(&global_trace, option);
4764
4765                 /* Put back the comma to allow this to be called again */
4766                 if (buf)
4767                         *(buf - 1) = ',';
4768         }
4769 }
4770
4771 static ssize_t
4772 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4773                         size_t cnt, loff_t *ppos)
4774 {
4775         struct seq_file *m = filp->private_data;
4776         struct trace_array *tr = m->private;
4777         char buf[64];
4778         int ret;
4779
4780         if (cnt >= sizeof(buf))
4781                 return -EINVAL;
4782
4783         if (copy_from_user(buf, ubuf, cnt))
4784                 return -EFAULT;
4785
4786         buf[cnt] = 0;
4787
4788         ret = trace_set_options(tr, buf);
4789         if (ret < 0)
4790                 return ret;
4791
4792         *ppos += cnt;
4793
4794         return cnt;
4795 }
4796
4797 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4798 {
4799         struct trace_array *tr = inode->i_private;
4800         int ret;
4801
4802         ret = tracing_check_open_get_tr(tr);
4803         if (ret)
4804                 return ret;
4805
4806         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4807         if (ret < 0)
4808                 trace_array_put(tr);
4809
4810         return ret;
4811 }
4812
4813 static const struct file_operations tracing_iter_fops = {
4814         .open           = tracing_trace_options_open,
4815         .read           = seq_read,
4816         .llseek         = seq_lseek,
4817         .release        = tracing_single_release_tr,
4818         .write          = tracing_trace_options_write,
4819 };
4820
4821 static const char readme_msg[] =
4822         "tracing mini-HOWTO:\n\n"
4823         "# echo 0 > tracing_on : quick way to disable tracing\n"
4824         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4825         " Important files:\n"
4826         "  trace\t\t\t- The static contents of the buffer\n"
4827         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4828         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4829         "  current_tracer\t- function and latency tracers\n"
4830         "  available_tracers\t- list of configured tracers for current_tracer\n"
4831         "  error_log\t- error log for failed commands (that support it)\n"
4832         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4833         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4834         "  trace_clock\t\t-change the clock used to order events\n"
4835         "       local:   Per cpu clock but may not be synced across CPUs\n"
4836         "      global:   Synced across CPUs but slows tracing down.\n"
4837         "     counter:   Not a clock, but just an increment\n"
4838         "      uptime:   Jiffy counter from time of boot\n"
4839         "        perf:   Same clock that perf events use\n"
4840 #ifdef CONFIG_X86_64
4841         "     x86-tsc:   TSC cycle counter\n"
4842 #endif
4843         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4844         "       delta:   Delta difference against a buffer-wide timestamp\n"
4845         "    absolute:   Absolute (standalone) timestamp\n"
4846         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4847         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4848         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4849         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4850         "\t\t\t  Remove sub-buffer with rmdir\n"
4851         "  trace_options\t\t- Set format or modify how tracing happens\n"
4852         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4853         "\t\t\t  option name\n"
4854         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4855 #ifdef CONFIG_DYNAMIC_FTRACE
4856         "\n  available_filter_functions - list of functions that can be filtered on\n"
4857         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4858         "\t\t\t  functions\n"
4859         "\t     accepts: func_full_name or glob-matching-pattern\n"
4860         "\t     modules: Can select a group via module\n"
4861         "\t      Format: :mod:<module-name>\n"
4862         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4863         "\t    triggers: a command to perform when function is hit\n"
4864         "\t      Format: <function>:<trigger>[:count]\n"
4865         "\t     trigger: traceon, traceoff\n"
4866         "\t\t      enable_event:<system>:<event>\n"
4867         "\t\t      disable_event:<system>:<event>\n"
4868 #ifdef CONFIG_STACKTRACE
4869         "\t\t      stacktrace\n"
4870 #endif
4871 #ifdef CONFIG_TRACER_SNAPSHOT
4872         "\t\t      snapshot\n"
4873 #endif
4874         "\t\t      dump\n"
4875         "\t\t      cpudump\n"
4876         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4877         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4878         "\t     The first one will disable tracing every time do_fault is hit\n"
4879         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4880         "\t       The first time do trap is hit and it disables tracing, the\n"
4881         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4882         "\t       the counter will not decrement. It only decrements when the\n"
4883         "\t       trigger did work\n"
4884         "\t     To remove trigger without count:\n"
4885         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4886         "\t     To remove trigger with a count:\n"
4887         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4888         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4889         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4890         "\t    modules: Can select a group via module command :mod:\n"
4891         "\t    Does not accept triggers\n"
4892 #endif /* CONFIG_DYNAMIC_FTRACE */
4893 #ifdef CONFIG_FUNCTION_TRACER
4894         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4895         "\t\t    (function)\n"
4896 #endif
4897 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4898         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4899         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4900         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4901 #endif
4902 #ifdef CONFIG_TRACER_SNAPSHOT
4903         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4904         "\t\t\t  snapshot buffer. Read the contents for more\n"
4905         "\t\t\t  information\n"
4906 #endif
4907 #ifdef CONFIG_STACK_TRACER
4908         "  stack_trace\t\t- Shows the max stack trace when active\n"
4909         "  stack_max_size\t- Shows current max stack size that was traced\n"
4910         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4911         "\t\t\t  new trace)\n"
4912 #ifdef CONFIG_DYNAMIC_FTRACE
4913         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4914         "\t\t\t  traces\n"
4915 #endif
4916 #endif /* CONFIG_STACK_TRACER */
4917 #ifdef CONFIG_DYNAMIC_EVENTS
4918         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4919         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4920 #endif
4921 #ifdef CONFIG_KPROBE_EVENTS
4922         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4923         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4924 #endif
4925 #ifdef CONFIG_UPROBE_EVENTS
4926         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4927         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4928 #endif
4929 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4930         "\t  accepts: event-definitions (one definition per line)\n"
4931         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4932         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4933 #ifdef CONFIG_HIST_TRIGGERS
4934         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4935 #endif
4936         "\t           -:[<group>/]<event>\n"
4937 #ifdef CONFIG_KPROBE_EVENTS
4938         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4939   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4940 #endif
4941 #ifdef CONFIG_UPROBE_EVENTS
4942   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4943 #endif
4944         "\t     args: <name>=fetcharg[:type]\n"
4945         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4946 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4947         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4948 #else
4949         "\t           $stack<index>, $stack, $retval, $comm,\n"
4950 #endif
4951         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4952         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4953         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4954         "\t           <type>\\[<array-size>\\]\n"
4955 #ifdef CONFIG_HIST_TRIGGERS
4956         "\t    field: <stype> <name>;\n"
4957         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4958         "\t           [unsigned] char/int/long\n"
4959 #endif
4960 #endif
4961         "  events/\t\t- Directory containing all trace event subsystems:\n"
4962         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4963         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4964         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4965         "\t\t\t  events\n"
4966         "      filter\t\t- If set, only events passing filter are traced\n"
4967         "  events/<system>/<event>/\t- Directory containing control files for\n"
4968         "\t\t\t  <event>:\n"
4969         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4970         "      filter\t\t- If set, only events passing filter are traced\n"
4971         "      trigger\t\t- If set, a command to perform when event is hit\n"
4972         "\t    Format: <trigger>[:count][if <filter>]\n"
4973         "\t   trigger: traceon, traceoff\n"
4974         "\t            enable_event:<system>:<event>\n"
4975         "\t            disable_event:<system>:<event>\n"
4976 #ifdef CONFIG_HIST_TRIGGERS
4977         "\t            enable_hist:<system>:<event>\n"
4978         "\t            disable_hist:<system>:<event>\n"
4979 #endif
4980 #ifdef CONFIG_STACKTRACE
4981         "\t\t    stacktrace\n"
4982 #endif
4983 #ifdef CONFIG_TRACER_SNAPSHOT
4984         "\t\t    snapshot\n"
4985 #endif
4986 #ifdef CONFIG_HIST_TRIGGERS
4987         "\t\t    hist (see below)\n"
4988 #endif
4989         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4990         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4991         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4992         "\t                  events/block/block_unplug/trigger\n"
4993         "\t   The first disables tracing every time block_unplug is hit.\n"
4994         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4995         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4996         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4997         "\t   Like function triggers, the counter is only decremented if it\n"
4998         "\t    enabled or disabled tracing.\n"
4999         "\t   To remove a trigger without a count:\n"
5000         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5001         "\t   To remove a trigger with a count:\n"
5002         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5003         "\t   Filters can be ignored when removing a trigger.\n"
5004 #ifdef CONFIG_HIST_TRIGGERS
5005         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5006         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5007         "\t            [:values=<field1[,field2,...]>]\n"
5008         "\t            [:sort=<field1[,field2,...]>]\n"
5009         "\t            [:size=#entries]\n"
5010         "\t            [:pause][:continue][:clear]\n"
5011         "\t            [:name=histname1]\n"
5012         "\t            [:<handler>.<action>]\n"
5013         "\t            [if <filter>]\n\n"
5014         "\t    Note, special fields can be used as well:\n"
5015         "\t            common_timestamp - to record current timestamp\n"
5016         "\t            common_cpu - to record the CPU the event happened on\n"
5017         "\n"
5018         "\t    When a matching event is hit, an entry is added to a hash\n"
5019         "\t    table using the key(s) and value(s) named, and the value of a\n"
5020         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5021         "\t    correspond to fields in the event's format description.  Keys\n"
5022         "\t    can be any field, or the special string 'stacktrace'.\n"
5023         "\t    Compound keys consisting of up to two fields can be specified\n"
5024         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5025         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5026         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5027         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5028         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5029         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5030         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5031         "\t    its histogram data will be shared with other triggers of the\n"
5032         "\t    same name, and trigger hits will update this common data.\n\n"
5033         "\t    Reading the 'hist' file for the event will dump the hash\n"
5034         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5035         "\t    triggers attached to an event, there will be a table for each\n"
5036         "\t    trigger in the output.  The table displayed for a named\n"
5037         "\t    trigger will be the same as any other instance having the\n"
5038         "\t    same name.  The default format used to display a given field\n"
5039         "\t    can be modified by appending any of the following modifiers\n"
5040         "\t    to the field name, as applicable:\n\n"
5041         "\t            .hex        display a number as a hex value\n"
5042         "\t            .sym        display an address as a symbol\n"
5043         "\t            .sym-offset display an address as a symbol and offset\n"
5044         "\t            .execname   display a common_pid as a program name\n"
5045         "\t            .syscall    display a syscall id as a syscall name\n"
5046         "\t            .log2       display log2 value rather than raw number\n"
5047         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5048         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5049         "\t    trigger or to start a hist trigger but not log any events\n"
5050         "\t    until told to do so.  'continue' can be used to start or\n"
5051         "\t    restart a paused hist trigger.\n\n"
5052         "\t    The 'clear' parameter will clear the contents of a running\n"
5053         "\t    hist trigger and leave its current paused/active state\n"
5054         "\t    unchanged.\n\n"
5055         "\t    The enable_hist and disable_hist triggers can be used to\n"
5056         "\t    have one event conditionally start and stop another event's\n"
5057         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5058         "\t    the enable_event and disable_event triggers.\n\n"
5059         "\t    Hist trigger handlers and actions are executed whenever a\n"
5060         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5061         "\t        <handler>.<action>\n\n"
5062         "\t    The available handlers are:\n\n"
5063         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5064         "\t        onmax(var)               - invoke if var exceeds current max\n"
5065         "\t        onchange(var)            - invoke action if var changes\n\n"
5066         "\t    The available actions are:\n\n"
5067         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5068         "\t        save(field,...)                      - save current event fields\n"
5069 #ifdef CONFIG_TRACER_SNAPSHOT
5070         "\t        snapshot()                           - snapshot the trace buffer\n"
5071 #endif
5072 #endif
5073 ;
5074
5075 static ssize_t
5076 tracing_readme_read(struct file *filp, char __user *ubuf,
5077                        size_t cnt, loff_t *ppos)
5078 {
5079         return simple_read_from_buffer(ubuf, cnt, ppos,
5080                                         readme_msg, strlen(readme_msg));
5081 }
5082
5083 static const struct file_operations tracing_readme_fops = {
5084         .open           = tracing_open_generic,
5085         .read           = tracing_readme_read,
5086         .llseek         = generic_file_llseek,
5087 };
5088
5089 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5090 {
5091         int pid = ++(*pos);
5092
5093         return trace_find_tgid_ptr(pid);
5094 }
5095
5096 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5097 {
5098         int pid = *pos;
5099
5100         return trace_find_tgid_ptr(pid);
5101 }
5102
5103 static void saved_tgids_stop(struct seq_file *m, void *v)
5104 {
5105 }
5106
5107 static int saved_tgids_show(struct seq_file *m, void *v)
5108 {
5109         int *entry = (int *)v;
5110         int pid = entry - tgid_map;
5111         int tgid = *entry;
5112
5113         if (tgid == 0)
5114                 return SEQ_SKIP;
5115
5116         seq_printf(m, "%d %d\n", pid, tgid);
5117         return 0;
5118 }
5119
5120 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5121         .start          = saved_tgids_start,
5122         .stop           = saved_tgids_stop,
5123         .next           = saved_tgids_next,
5124         .show           = saved_tgids_show,
5125 };
5126
5127 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5128 {
5129         int ret;
5130
5131         ret = tracing_check_open_get_tr(NULL);
5132         if (ret)
5133                 return ret;
5134
5135         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5136 }
5137
5138
5139 static const struct file_operations tracing_saved_tgids_fops = {
5140         .open           = tracing_saved_tgids_open,
5141         .read           = seq_read,
5142         .llseek         = seq_lseek,
5143         .release        = seq_release,
5144 };
5145
5146 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5147 {
5148         unsigned int *ptr = v;
5149
5150         if (*pos || m->count)
5151                 ptr++;
5152
5153         (*pos)++;
5154
5155         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5156              ptr++) {
5157                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5158                         continue;
5159
5160                 return ptr;
5161         }
5162
5163         return NULL;
5164 }
5165
5166 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5167 {
5168         void *v;
5169         loff_t l = 0;
5170
5171         preempt_disable();
5172         arch_spin_lock(&trace_cmdline_lock);
5173
5174         v = &savedcmd->map_cmdline_to_pid[0];
5175         while (l <= *pos) {
5176                 v = saved_cmdlines_next(m, v, &l);
5177                 if (!v)
5178                         return NULL;
5179         }
5180
5181         return v;
5182 }
5183
5184 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5185 {
5186         arch_spin_unlock(&trace_cmdline_lock);
5187         preempt_enable();
5188 }
5189
5190 static int saved_cmdlines_show(struct seq_file *m, void *v)
5191 {
5192         char buf[TASK_COMM_LEN];
5193         unsigned int *pid = v;
5194
5195         __trace_find_cmdline(*pid, buf);
5196         seq_printf(m, "%d %s\n", *pid, buf);
5197         return 0;
5198 }
5199
5200 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5201         .start          = saved_cmdlines_start,
5202         .next           = saved_cmdlines_next,
5203         .stop           = saved_cmdlines_stop,
5204         .show           = saved_cmdlines_show,
5205 };
5206
5207 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5208 {
5209         int ret;
5210
5211         ret = tracing_check_open_get_tr(NULL);
5212         if (ret)
5213                 return ret;
5214
5215         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5216 }
5217
5218 static const struct file_operations tracing_saved_cmdlines_fops = {
5219         .open           = tracing_saved_cmdlines_open,
5220         .read           = seq_read,
5221         .llseek         = seq_lseek,
5222         .release        = seq_release,
5223 };
5224
5225 static ssize_t
5226 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5227                                  size_t cnt, loff_t *ppos)
5228 {
5229         char buf[64];
5230         int r;
5231
5232         preempt_disable();
5233         arch_spin_lock(&trace_cmdline_lock);
5234         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5235         arch_spin_unlock(&trace_cmdline_lock);
5236         preempt_enable();
5237
5238         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5239 }
5240
5241 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5242 {
5243         kfree(s->saved_cmdlines);
5244         kfree(s->map_cmdline_to_pid);
5245         kfree(s);
5246 }
5247
5248 static int tracing_resize_saved_cmdlines(unsigned int val)
5249 {
5250         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5251
5252         s = kmalloc(sizeof(*s), GFP_KERNEL);
5253         if (!s)
5254                 return -ENOMEM;
5255
5256         if (allocate_cmdlines_buffer(val, s) < 0) {
5257                 kfree(s);
5258                 return -ENOMEM;
5259         }
5260
5261         preempt_disable();
5262         arch_spin_lock(&trace_cmdline_lock);
5263         savedcmd_temp = savedcmd;
5264         savedcmd = s;
5265         arch_spin_unlock(&trace_cmdline_lock);
5266         preempt_enable();
5267         free_saved_cmdlines_buffer(savedcmd_temp);
5268
5269         return 0;
5270 }
5271
5272 static ssize_t
5273 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5274                                   size_t cnt, loff_t *ppos)
5275 {
5276         unsigned long val;
5277         int ret;
5278
5279         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5280         if (ret)
5281                 return ret;
5282
5283         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5284         if (!val || val > PID_MAX_DEFAULT)
5285                 return -EINVAL;
5286
5287         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5288         if (ret < 0)
5289                 return ret;
5290
5291         *ppos += cnt;
5292
5293         return cnt;
5294 }
5295
5296 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5297         .open           = tracing_open_generic,
5298         .read           = tracing_saved_cmdlines_size_read,
5299         .write          = tracing_saved_cmdlines_size_write,
5300 };
5301
5302 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5303 static union trace_eval_map_item *
5304 update_eval_map(union trace_eval_map_item *ptr)
5305 {
5306         if (!ptr->map.eval_string) {
5307                 if (ptr->tail.next) {
5308                         ptr = ptr->tail.next;
5309                         /* Set ptr to the next real item (skip head) */
5310                         ptr++;
5311                 } else
5312                         return NULL;
5313         }
5314         return ptr;
5315 }
5316
5317 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5318 {
5319         union trace_eval_map_item *ptr = v;
5320
5321         /*
5322          * Paranoid! If ptr points to end, we don't want to increment past it.
5323          * This really should never happen.
5324          */
5325         ptr = update_eval_map(ptr);
5326         if (WARN_ON_ONCE(!ptr))
5327                 return NULL;
5328
5329         ptr++;
5330
5331         (*pos)++;
5332
5333         ptr = update_eval_map(ptr);
5334
5335         return ptr;
5336 }
5337
5338 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5339 {
5340         union trace_eval_map_item *v;
5341         loff_t l = 0;
5342
5343         mutex_lock(&trace_eval_mutex);
5344
5345         v = trace_eval_maps;
5346         if (v)
5347                 v++;
5348
5349         while (v && l < *pos) {
5350                 v = eval_map_next(m, v, &l);
5351         }
5352
5353         return v;
5354 }
5355
5356 static void eval_map_stop(struct seq_file *m, void *v)
5357 {
5358         mutex_unlock(&trace_eval_mutex);
5359 }
5360
5361 static int eval_map_show(struct seq_file *m, void *v)
5362 {
5363         union trace_eval_map_item *ptr = v;
5364
5365         seq_printf(m, "%s %ld (%s)\n",
5366                    ptr->map.eval_string, ptr->map.eval_value,
5367                    ptr->map.system);
5368
5369         return 0;
5370 }
5371
5372 static const struct seq_operations tracing_eval_map_seq_ops = {
5373         .start          = eval_map_start,
5374         .next           = eval_map_next,
5375         .stop           = eval_map_stop,
5376         .show           = eval_map_show,
5377 };
5378
5379 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5380 {
5381         int ret;
5382
5383         ret = tracing_check_open_get_tr(NULL);
5384         if (ret)
5385                 return ret;
5386
5387         return seq_open(filp, &tracing_eval_map_seq_ops);
5388 }
5389
5390 static const struct file_operations tracing_eval_map_fops = {
5391         .open           = tracing_eval_map_open,
5392         .read           = seq_read,
5393         .llseek         = seq_lseek,
5394         .release        = seq_release,
5395 };
5396
5397 static inline union trace_eval_map_item *
5398 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5399 {
5400         /* Return tail of array given the head */
5401         return ptr + ptr->head.length + 1;
5402 }
5403
5404 static void
5405 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5406                            int len)
5407 {
5408         struct trace_eval_map **stop;
5409         struct trace_eval_map **map;
5410         union trace_eval_map_item *map_array;
5411         union trace_eval_map_item *ptr;
5412
5413         stop = start + len;
5414
5415         /*
5416          * The trace_eval_maps contains the map plus a head and tail item,
5417          * where the head holds the module and length of array, and the
5418          * tail holds a pointer to the next list.
5419          */
5420         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5421         if (!map_array) {
5422                 pr_warn("Unable to allocate trace eval mapping\n");
5423                 return;
5424         }
5425
5426         mutex_lock(&trace_eval_mutex);
5427
5428         if (!trace_eval_maps)
5429                 trace_eval_maps = map_array;
5430         else {
5431                 ptr = trace_eval_maps;
5432                 for (;;) {
5433                         ptr = trace_eval_jmp_to_tail(ptr);
5434                         if (!ptr->tail.next)
5435                                 break;
5436                         ptr = ptr->tail.next;
5437
5438                 }
5439                 ptr->tail.next = map_array;
5440         }
5441         map_array->head.mod = mod;
5442         map_array->head.length = len;
5443         map_array++;
5444
5445         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5446                 map_array->map = **map;
5447                 map_array++;
5448         }
5449         memset(map_array, 0, sizeof(*map_array));
5450
5451         mutex_unlock(&trace_eval_mutex);
5452 }
5453
5454 static void trace_create_eval_file(struct dentry *d_tracer)
5455 {
5456         trace_create_file("eval_map", 0444, d_tracer,
5457                           NULL, &tracing_eval_map_fops);
5458 }
5459
5460 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5461 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5462 static inline void trace_insert_eval_map_file(struct module *mod,
5463                               struct trace_eval_map **start, int len) { }
5464 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5465
5466 static void trace_insert_eval_map(struct module *mod,
5467                                   struct trace_eval_map **start, int len)
5468 {
5469         struct trace_eval_map **map;
5470
5471         if (len <= 0)
5472                 return;
5473
5474         map = start;
5475
5476         trace_event_eval_update(map, len);
5477
5478         trace_insert_eval_map_file(mod, start, len);
5479 }
5480
5481 static ssize_t
5482 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5483                        size_t cnt, loff_t *ppos)
5484 {
5485         struct trace_array *tr = filp->private_data;
5486         char buf[MAX_TRACER_SIZE+2];
5487         int r;
5488
5489         mutex_lock(&trace_types_lock);
5490         r = sprintf(buf, "%s\n", tr->current_trace->name);
5491         mutex_unlock(&trace_types_lock);
5492
5493         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5494 }
5495
5496 int tracer_init(struct tracer *t, struct trace_array *tr)
5497 {
5498         tracing_reset_online_cpus(&tr->trace_buffer);
5499         return t->init(tr);
5500 }
5501
5502 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5503 {
5504         int cpu;
5505
5506         for_each_tracing_cpu(cpu)
5507                 per_cpu_ptr(buf->data, cpu)->entries = val;
5508 }
5509
5510 #ifdef CONFIG_TRACER_MAX_TRACE
5511 /* resize @tr's buffer to the size of @size_tr's entries */
5512 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5513                                         struct trace_buffer *size_buf, int cpu_id)
5514 {
5515         int cpu, ret = 0;
5516
5517         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5518                 for_each_tracing_cpu(cpu) {
5519                         ret = ring_buffer_resize(trace_buf->buffer,
5520                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5521                         if (ret < 0)
5522                                 break;
5523                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5524                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5525                 }
5526         } else {
5527                 ret = ring_buffer_resize(trace_buf->buffer,
5528                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5529                 if (ret == 0)
5530                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5531                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5532         }
5533
5534         return ret;
5535 }
5536 #endif /* CONFIG_TRACER_MAX_TRACE */
5537
5538 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5539                                         unsigned long size, int cpu)
5540 {
5541         int ret;
5542
5543         /*
5544          * If kernel or user changes the size of the ring buffer
5545          * we use the size that was given, and we can forget about
5546          * expanding it later.
5547          */
5548         ring_buffer_expanded = true;
5549
5550         /* May be called before buffers are initialized */
5551         if (!tr->trace_buffer.buffer)
5552                 return 0;
5553
5554         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5555         if (ret < 0)
5556                 return ret;
5557
5558 #ifdef CONFIG_TRACER_MAX_TRACE
5559         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5560             !tr->current_trace->use_max_tr)
5561                 goto out;
5562
5563         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5564         if (ret < 0) {
5565                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5566                                                      &tr->trace_buffer, cpu);
5567                 if (r < 0) {
5568                         /*
5569                          * AARGH! We are left with different
5570                          * size max buffer!!!!
5571                          * The max buffer is our "snapshot" buffer.
5572                          * When a tracer needs a snapshot (one of the
5573                          * latency tracers), it swaps the max buffer
5574                          * with the saved snap shot. We succeeded to
5575                          * update the size of the main buffer, but failed to
5576                          * update the size of the max buffer. But when we tried
5577                          * to reset the main buffer to the original size, we
5578                          * failed there too. This is very unlikely to
5579                          * happen, but if it does, warn and kill all
5580                          * tracing.
5581                          */
5582                         WARN_ON(1);
5583                         tracing_disabled = 1;
5584                 }
5585                 return ret;
5586         }
5587
5588         if (cpu == RING_BUFFER_ALL_CPUS)
5589                 set_buffer_entries(&tr->max_buffer, size);
5590         else
5591                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5592
5593  out:
5594 #endif /* CONFIG_TRACER_MAX_TRACE */
5595
5596         if (cpu == RING_BUFFER_ALL_CPUS)
5597                 set_buffer_entries(&tr->trace_buffer, size);
5598         else
5599                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5600
5601         return ret;
5602 }
5603
5604 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5605                                           unsigned long size, int cpu_id)
5606 {
5607         int ret = size;
5608
5609         mutex_lock(&trace_types_lock);
5610
5611         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5612                 /* make sure, this cpu is enabled in the mask */
5613                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5614                         ret = -EINVAL;
5615                         goto out;
5616                 }
5617         }
5618
5619         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5620         if (ret < 0)
5621                 ret = -ENOMEM;
5622
5623 out:
5624         mutex_unlock(&trace_types_lock);
5625
5626         return ret;
5627 }
5628
5629
5630 /**
5631  * tracing_update_buffers - used by tracing facility to expand ring buffers
5632  *
5633  * To save on memory when the tracing is never used on a system with it
5634  * configured in. The ring buffers are set to a minimum size. But once
5635  * a user starts to use the tracing facility, then they need to grow
5636  * to their default size.
5637  *
5638  * This function is to be called when a tracer is about to be used.
5639  */
5640 int tracing_update_buffers(void)
5641 {
5642         int ret = 0;
5643
5644         mutex_lock(&trace_types_lock);
5645         if (!ring_buffer_expanded)
5646                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5647                                                 RING_BUFFER_ALL_CPUS);
5648         mutex_unlock(&trace_types_lock);
5649
5650         return ret;
5651 }
5652
5653 struct trace_option_dentry;
5654
5655 static void
5656 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5657
5658 /*
5659  * Used to clear out the tracer before deletion of an instance.
5660  * Must have trace_types_lock held.
5661  */
5662 static void tracing_set_nop(struct trace_array *tr)
5663 {
5664         if (tr->current_trace == &nop_trace)
5665                 return;
5666         
5667         tr->current_trace->enabled--;
5668
5669         if (tr->current_trace->reset)
5670                 tr->current_trace->reset(tr);
5671
5672         tr->current_trace = &nop_trace;
5673 }
5674
5675 static bool tracer_options_updated;
5676
5677 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5678 {
5679         /* Only enable if the directory has been created already. */
5680         if (!tr->dir)
5681                 return;
5682
5683         /* Only create trace option files after update_tracer_options finish */
5684         if (!tracer_options_updated)
5685                 return;
5686
5687         create_trace_option_files(tr, t);
5688 }
5689
5690 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5691 {
5692         struct tracer *t;
5693 #ifdef CONFIG_TRACER_MAX_TRACE
5694         bool had_max_tr;
5695 #endif
5696         int ret = 0;
5697
5698         mutex_lock(&trace_types_lock);
5699
5700         if (!ring_buffer_expanded) {
5701                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5702                                                 RING_BUFFER_ALL_CPUS);
5703                 if (ret < 0)
5704                         goto out;
5705                 ret = 0;
5706         }
5707
5708         for (t = trace_types; t; t = t->next) {
5709                 if (strcmp(t->name, buf) == 0)
5710                         break;
5711         }
5712         if (!t) {
5713                 ret = -EINVAL;
5714                 goto out;
5715         }
5716         if (t == tr->current_trace)
5717                 goto out;
5718
5719 #ifdef CONFIG_TRACER_SNAPSHOT
5720         if (t->use_max_tr) {
5721                 local_irq_disable();
5722                 arch_spin_lock(&tr->max_lock);
5723                 if (tr->cond_snapshot)
5724                         ret = -EBUSY;
5725                 arch_spin_unlock(&tr->max_lock);
5726                 local_irq_enable();
5727                 if (ret)
5728                         goto out;
5729         }
5730 #endif
5731         /* Some tracers won't work on kernel command line */
5732         if (system_state < SYSTEM_RUNNING && t->noboot) {
5733                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5734                         t->name);
5735                 goto out;
5736         }
5737
5738         /* Some tracers are only allowed for the top level buffer */
5739         if (!trace_ok_for_array(t, tr)) {
5740                 ret = -EINVAL;
5741                 goto out;
5742         }
5743
5744         /* If trace pipe files are being read, we can't change the tracer */
5745         if (tr->trace_ref) {
5746                 ret = -EBUSY;
5747                 goto out;
5748         }
5749
5750         trace_branch_disable();
5751
5752         tr->current_trace->enabled--;
5753
5754         if (tr->current_trace->reset)
5755                 tr->current_trace->reset(tr);
5756
5757         /* Current trace needs to be nop_trace before synchronize_rcu */
5758         tr->current_trace = &nop_trace;
5759
5760 #ifdef CONFIG_TRACER_MAX_TRACE
5761         had_max_tr = tr->allocated_snapshot;
5762
5763         if (had_max_tr && !t->use_max_tr) {
5764                 /*
5765                  * We need to make sure that the update_max_tr sees that
5766                  * current_trace changed to nop_trace to keep it from
5767                  * swapping the buffers after we resize it.
5768                  * The update_max_tr is called from interrupts disabled
5769                  * so a synchronized_sched() is sufficient.
5770                  */
5771                 synchronize_rcu();
5772                 free_snapshot(tr);
5773         }
5774 #endif
5775
5776 #ifdef CONFIG_TRACER_MAX_TRACE
5777         if (t->use_max_tr && !had_max_tr) {
5778                 ret = tracing_alloc_snapshot_instance(tr);
5779                 if (ret < 0)
5780                         goto out;
5781         }
5782 #endif
5783
5784         if (t->init) {
5785                 ret = tracer_init(t, tr);
5786                 if (ret)
5787                         goto out;
5788         }
5789
5790         tr->current_trace = t;
5791         tr->current_trace->enabled++;
5792         trace_branch_enable(tr);
5793  out:
5794         mutex_unlock(&trace_types_lock);
5795
5796         return ret;
5797 }
5798
5799 static ssize_t
5800 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5801                         size_t cnt, loff_t *ppos)
5802 {
5803         struct trace_array *tr = filp->private_data;
5804         char buf[MAX_TRACER_SIZE+1];
5805         int i;
5806         size_t ret;
5807         int err;
5808
5809         ret = cnt;
5810
5811         if (cnt > MAX_TRACER_SIZE)
5812                 cnt = MAX_TRACER_SIZE;
5813
5814         if (copy_from_user(buf, ubuf, cnt))
5815                 return -EFAULT;
5816
5817         buf[cnt] = 0;
5818
5819         /* strip ending whitespace. */
5820         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5821                 buf[i] = 0;
5822
5823         err = tracing_set_tracer(tr, buf);
5824         if (err)
5825                 return err;
5826
5827         *ppos += ret;
5828
5829         return ret;
5830 }
5831
5832 static ssize_t
5833 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5834                    size_t cnt, loff_t *ppos)
5835 {
5836         char buf[64];
5837         int r;
5838
5839         r = snprintf(buf, sizeof(buf), "%ld\n",
5840                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5841         if (r > sizeof(buf))
5842                 r = sizeof(buf);
5843         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5844 }
5845
5846 static ssize_t
5847 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5848                     size_t cnt, loff_t *ppos)
5849 {
5850         unsigned long val;
5851         int ret;
5852
5853         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5854         if (ret)
5855                 return ret;
5856
5857         *ptr = val * 1000;
5858
5859         return cnt;
5860 }
5861
5862 static ssize_t
5863 tracing_thresh_read(struct file *filp, char __user *ubuf,
5864                     size_t cnt, loff_t *ppos)
5865 {
5866         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5867 }
5868
5869 static ssize_t
5870 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5871                      size_t cnt, loff_t *ppos)
5872 {
5873         struct trace_array *tr = filp->private_data;
5874         int ret;
5875
5876         mutex_lock(&trace_types_lock);
5877         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5878         if (ret < 0)
5879                 goto out;
5880
5881         if (tr->current_trace->update_thresh) {
5882                 ret = tr->current_trace->update_thresh(tr);
5883                 if (ret < 0)
5884                         goto out;
5885         }
5886
5887         ret = cnt;
5888 out:
5889         mutex_unlock(&trace_types_lock);
5890
5891         return ret;
5892 }
5893
5894 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5895
5896 static ssize_t
5897 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5898                      size_t cnt, loff_t *ppos)
5899 {
5900         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5901 }
5902
5903 static ssize_t
5904 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5905                       size_t cnt, loff_t *ppos)
5906 {
5907         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5908 }
5909
5910 #endif
5911
5912 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5913 {
5914         struct trace_array *tr = inode->i_private;
5915         struct trace_iterator *iter;
5916         int ret;
5917
5918         ret = tracing_check_open_get_tr(tr);
5919         if (ret)
5920                 return ret;
5921
5922         mutex_lock(&trace_types_lock);
5923
5924         /* create a buffer to store the information to pass to userspace */
5925         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5926         if (!iter) {
5927                 ret = -ENOMEM;
5928                 __trace_array_put(tr);
5929                 goto out;
5930         }
5931
5932         trace_seq_init(&iter->seq);
5933         iter->trace = tr->current_trace;
5934
5935         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5936                 ret = -ENOMEM;
5937                 goto fail;
5938         }
5939
5940         /* trace pipe does not show start of buffer */
5941         cpumask_setall(iter->started);
5942
5943         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5944                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5945
5946         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5947         if (trace_clocks[tr->clock_id].in_ns)
5948                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5949
5950         iter->tr = tr;
5951         iter->trace_buffer = &tr->trace_buffer;
5952         iter->cpu_file = tracing_get_cpu(inode);
5953         mutex_init(&iter->mutex);
5954         filp->private_data = iter;
5955
5956         if (iter->trace->pipe_open)
5957                 iter->trace->pipe_open(iter);
5958
5959         nonseekable_open(inode, filp);
5960
5961         tr->trace_ref++;
5962 out:
5963         mutex_unlock(&trace_types_lock);
5964         return ret;
5965
5966 fail:
5967         kfree(iter);
5968         __trace_array_put(tr);
5969         mutex_unlock(&trace_types_lock);
5970         return ret;
5971 }
5972
5973 static int tracing_release_pipe(struct inode *inode, struct file *file)
5974 {
5975         struct trace_iterator *iter = file->private_data;
5976         struct trace_array *tr = inode->i_private;
5977
5978         mutex_lock(&trace_types_lock);
5979
5980         tr->trace_ref--;
5981
5982         if (iter->trace->pipe_close)
5983                 iter->trace->pipe_close(iter);
5984
5985         mutex_unlock(&trace_types_lock);
5986
5987         free_cpumask_var(iter->started);
5988         mutex_destroy(&iter->mutex);
5989         kfree(iter);
5990
5991         trace_array_put(tr);
5992
5993         return 0;
5994 }
5995
5996 static __poll_t
5997 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5998 {
5999         struct trace_array *tr = iter->tr;
6000
6001         /* Iterators are static, they should be filled or empty */
6002         if (trace_buffer_iter(iter, iter->cpu_file))
6003                 return EPOLLIN | EPOLLRDNORM;
6004
6005         if (tr->trace_flags & TRACE_ITER_BLOCK)
6006                 /*
6007                  * Always select as readable when in blocking mode
6008                  */
6009                 return EPOLLIN | EPOLLRDNORM;
6010         else
6011                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6012                                              filp, poll_table, iter->tr->buffer_percent);
6013 }
6014
6015 static __poll_t
6016 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6017 {
6018         struct trace_iterator *iter = filp->private_data;
6019
6020         return trace_poll(iter, filp, poll_table);
6021 }
6022
6023 /* Must be called with iter->mutex held. */
6024 static int tracing_wait_pipe(struct file *filp)
6025 {
6026         struct trace_iterator *iter = filp->private_data;
6027         int ret;
6028
6029         while (trace_empty(iter)) {
6030
6031                 if ((filp->f_flags & O_NONBLOCK)) {
6032                         return -EAGAIN;
6033                 }
6034
6035                 /*
6036                  * We block until we read something and tracing is disabled.
6037                  * We still block if tracing is disabled, but we have never
6038                  * read anything. This allows a user to cat this file, and
6039                  * then enable tracing. But after we have read something,
6040                  * we give an EOF when tracing is again disabled.
6041                  *
6042                  * iter->pos will be 0 if we haven't read anything.
6043                  */
6044                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6045                         break;
6046
6047                 mutex_unlock(&iter->mutex);
6048
6049                 ret = wait_on_pipe(iter, 0);
6050
6051                 mutex_lock(&iter->mutex);
6052
6053                 if (ret)
6054                         return ret;
6055         }
6056
6057         return 1;
6058 }
6059
6060 /*
6061  * Consumer reader.
6062  */
6063 static ssize_t
6064 tracing_read_pipe(struct file *filp, char __user *ubuf,
6065                   size_t cnt, loff_t *ppos)
6066 {
6067         struct trace_iterator *iter = filp->private_data;
6068         ssize_t sret;
6069
6070         /*
6071          * Avoid more than one consumer on a single file descriptor
6072          * This is just a matter of traces coherency, the ring buffer itself
6073          * is protected.
6074          */
6075         mutex_lock(&iter->mutex);
6076
6077         /* return any leftover data */
6078         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6079         if (sret != -EBUSY)
6080                 goto out;
6081
6082         trace_seq_init(&iter->seq);
6083
6084         if (iter->trace->read) {
6085                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6086                 if (sret)
6087                         goto out;
6088         }
6089
6090 waitagain:
6091         sret = tracing_wait_pipe(filp);
6092         if (sret <= 0)
6093                 goto out;
6094
6095         /* stop when tracing is finished */
6096         if (trace_empty(iter)) {
6097                 sret = 0;
6098                 goto out;
6099         }
6100
6101         if (cnt >= PAGE_SIZE)
6102                 cnt = PAGE_SIZE - 1;
6103
6104         /* reset all but tr, trace, and overruns */
6105         memset(&iter->seq, 0,
6106                sizeof(struct trace_iterator) -
6107                offsetof(struct trace_iterator, seq));
6108         cpumask_clear(iter->started);
6109         trace_seq_init(&iter->seq);
6110         iter->pos = -1;
6111
6112         trace_event_read_lock();
6113         trace_access_lock(iter->cpu_file);
6114         while (trace_find_next_entry_inc(iter) != NULL) {
6115                 enum print_line_t ret;
6116                 int save_len = iter->seq.seq.len;
6117
6118                 ret = print_trace_line(iter);
6119                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6120                         /*
6121                          * If one print_trace_line() fills entire trace_seq in one shot,
6122                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6123                          * In this case, we need to consume it, otherwise, loop will peek
6124                          * this event next time, resulting in an infinite loop.
6125                          */
6126                         if (save_len == 0) {
6127                                 iter->seq.full = 0;
6128                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6129                                 trace_consume(iter);
6130                                 break;
6131                         }
6132
6133                         /* In other cases, don't print partial lines */
6134                         iter->seq.seq.len = save_len;
6135                         break;
6136                 }
6137                 if (ret != TRACE_TYPE_NO_CONSUME)
6138                         trace_consume(iter);
6139
6140                 if (trace_seq_used(&iter->seq) >= cnt)
6141                         break;
6142
6143                 /*
6144                  * Setting the full flag means we reached the trace_seq buffer
6145                  * size and we should leave by partial output condition above.
6146                  * One of the trace_seq_* functions is not used properly.
6147                  */
6148                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6149                           iter->ent->type);
6150         }
6151         trace_access_unlock(iter->cpu_file);
6152         trace_event_read_unlock();
6153
6154         /* Now copy what we have to the user */
6155         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6156         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6157                 trace_seq_init(&iter->seq);
6158
6159         /*
6160          * If there was nothing to send to user, in spite of consuming trace
6161          * entries, go back to wait for more entries.
6162          */
6163         if (sret == -EBUSY)
6164                 goto waitagain;
6165
6166 out:
6167         mutex_unlock(&iter->mutex);
6168
6169         return sret;
6170 }
6171
6172 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6173                                      unsigned int idx)
6174 {
6175         __free_page(spd->pages[idx]);
6176 }
6177
6178 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6179         .confirm                = generic_pipe_buf_confirm,
6180         .release                = generic_pipe_buf_release,
6181         .steal                  = generic_pipe_buf_steal,
6182         .get                    = generic_pipe_buf_get,
6183 };
6184
6185 static size_t
6186 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6187 {
6188         size_t count;
6189         int save_len;
6190         int ret;
6191
6192         /* Seq buffer is page-sized, exactly what we need. */
6193         for (;;) {
6194                 save_len = iter->seq.seq.len;
6195                 ret = print_trace_line(iter);
6196
6197                 if (trace_seq_has_overflowed(&iter->seq)) {
6198                         iter->seq.seq.len = save_len;
6199                         break;
6200                 }
6201
6202                 /*
6203                  * This should not be hit, because it should only
6204                  * be set if the iter->seq overflowed. But check it
6205                  * anyway to be safe.
6206                  */
6207                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6208                         iter->seq.seq.len = save_len;
6209                         break;
6210                 }
6211
6212                 count = trace_seq_used(&iter->seq) - save_len;
6213                 if (rem < count) {
6214                         rem = 0;
6215                         iter->seq.seq.len = save_len;
6216                         break;
6217                 }
6218
6219                 if (ret != TRACE_TYPE_NO_CONSUME)
6220                         trace_consume(iter);
6221                 rem -= count;
6222                 if (!trace_find_next_entry_inc(iter))   {
6223                         rem = 0;
6224                         iter->ent = NULL;
6225                         break;
6226                 }
6227         }
6228
6229         return rem;
6230 }
6231
6232 static ssize_t tracing_splice_read_pipe(struct file *filp,
6233                                         loff_t *ppos,
6234                                         struct pipe_inode_info *pipe,
6235                                         size_t len,
6236                                         unsigned int flags)
6237 {
6238         struct page *pages_def[PIPE_DEF_BUFFERS];
6239         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6240         struct trace_iterator *iter = filp->private_data;
6241         struct splice_pipe_desc spd = {
6242                 .pages          = pages_def,
6243                 .partial        = partial_def,
6244                 .nr_pages       = 0, /* This gets updated below. */
6245                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6246                 .ops            = &tracing_pipe_buf_ops,
6247                 .spd_release    = tracing_spd_release_pipe,
6248         };
6249         ssize_t ret;
6250         size_t rem;
6251         unsigned int i;
6252
6253         if (splice_grow_spd(pipe, &spd))
6254                 return -ENOMEM;
6255
6256         mutex_lock(&iter->mutex);
6257
6258         if (iter->trace->splice_read) {
6259                 ret = iter->trace->splice_read(iter, filp,
6260                                                ppos, pipe, len, flags);
6261                 if (ret)
6262                         goto out_err;
6263         }
6264
6265         ret = tracing_wait_pipe(filp);
6266         if (ret <= 0)
6267                 goto out_err;
6268
6269         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6270                 ret = -EFAULT;
6271                 goto out_err;
6272         }
6273
6274         trace_event_read_lock();
6275         trace_access_lock(iter->cpu_file);
6276
6277         /* Fill as many pages as possible. */
6278         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6279                 spd.pages[i] = alloc_page(GFP_KERNEL);
6280                 if (!spd.pages[i])
6281                         break;
6282
6283                 rem = tracing_fill_pipe_page(rem, iter);
6284
6285                 /* Copy the data into the page, so we can start over. */
6286                 ret = trace_seq_to_buffer(&iter->seq,
6287                                           page_address(spd.pages[i]),
6288                                           trace_seq_used(&iter->seq));
6289                 if (ret < 0) {
6290                         __free_page(spd.pages[i]);
6291                         break;
6292                 }
6293                 spd.partial[i].offset = 0;
6294                 spd.partial[i].len = trace_seq_used(&iter->seq);
6295
6296                 trace_seq_init(&iter->seq);
6297         }
6298
6299         trace_access_unlock(iter->cpu_file);
6300         trace_event_read_unlock();
6301         mutex_unlock(&iter->mutex);
6302
6303         spd.nr_pages = i;
6304
6305         if (i)
6306                 ret = splice_to_pipe(pipe, &spd);
6307         else
6308                 ret = 0;
6309 out:
6310         splice_shrink_spd(&spd);
6311         return ret;
6312
6313 out_err:
6314         mutex_unlock(&iter->mutex);
6315         goto out;
6316 }
6317
6318 static ssize_t
6319 tracing_entries_read(struct file *filp, char __user *ubuf,
6320                      size_t cnt, loff_t *ppos)
6321 {
6322         struct inode *inode = file_inode(filp);
6323         struct trace_array *tr = inode->i_private;
6324         int cpu = tracing_get_cpu(inode);
6325         char buf[64];
6326         int r = 0;
6327         ssize_t ret;
6328
6329         mutex_lock(&trace_types_lock);
6330
6331         if (cpu == RING_BUFFER_ALL_CPUS) {
6332                 int cpu, buf_size_same;
6333                 unsigned long size;
6334
6335                 size = 0;
6336                 buf_size_same = 1;
6337                 /* check if all cpu sizes are same */
6338                 for_each_tracing_cpu(cpu) {
6339                         /* fill in the size from first enabled cpu */
6340                         if (size == 0)
6341                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6342                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6343                                 buf_size_same = 0;
6344                                 break;
6345                         }
6346                 }
6347
6348                 if (buf_size_same) {
6349                         if (!ring_buffer_expanded)
6350                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6351                                             size >> 10,
6352                                             trace_buf_size >> 10);
6353                         else
6354                                 r = sprintf(buf, "%lu\n", size >> 10);
6355                 } else
6356                         r = sprintf(buf, "X\n");
6357         } else
6358                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6359
6360         mutex_unlock(&trace_types_lock);
6361
6362         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6363         return ret;
6364 }
6365
6366 static ssize_t
6367 tracing_entries_write(struct file *filp, const char __user *ubuf,
6368                       size_t cnt, loff_t *ppos)
6369 {
6370         struct inode *inode = file_inode(filp);
6371         struct trace_array *tr = inode->i_private;
6372         unsigned long val;
6373         int ret;
6374
6375         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6376         if (ret)
6377                 return ret;
6378
6379         /* must have at least 1 entry */
6380         if (!val)
6381                 return -EINVAL;
6382
6383         /* value is in KB */
6384         val <<= 10;
6385         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6386         if (ret < 0)
6387                 return ret;
6388
6389         *ppos += cnt;
6390
6391         return cnt;
6392 }
6393
6394 static ssize_t
6395 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6396                                 size_t cnt, loff_t *ppos)
6397 {
6398         struct trace_array *tr = filp->private_data;
6399         char buf[64];
6400         int r, cpu;
6401         unsigned long size = 0, expanded_size = 0;
6402
6403         mutex_lock(&trace_types_lock);
6404         for_each_tracing_cpu(cpu) {
6405                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6406                 if (!ring_buffer_expanded)
6407                         expanded_size += trace_buf_size >> 10;
6408         }
6409         if (ring_buffer_expanded)
6410                 r = sprintf(buf, "%lu\n", size);
6411         else
6412                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6413         mutex_unlock(&trace_types_lock);
6414
6415         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6416 }
6417
6418 static ssize_t
6419 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6420                           size_t cnt, loff_t *ppos)
6421 {
6422         /*
6423          * There is no need to read what the user has written, this function
6424          * is just to make sure that there is no error when "echo" is used
6425          */
6426
6427         *ppos += cnt;
6428
6429         return cnt;
6430 }
6431
6432 static int
6433 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6434 {
6435         struct trace_array *tr = inode->i_private;
6436
6437         /* disable tracing ? */
6438         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6439                 tracer_tracing_off(tr);
6440         /* resize the ring buffer to 0 */
6441         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6442
6443         trace_array_put(tr);
6444
6445         return 0;
6446 }
6447
6448 static ssize_t
6449 tracing_mark_write(struct file *filp, const char __user *ubuf,
6450                                         size_t cnt, loff_t *fpos)
6451 {
6452         struct trace_array *tr = filp->private_data;
6453         struct ring_buffer_event *event;
6454         enum event_trigger_type tt = ETT_NONE;
6455         struct ring_buffer *buffer;
6456         struct print_entry *entry;
6457         unsigned long irq_flags;
6458         ssize_t written;
6459         int size;
6460         int len;
6461
6462 /* Used in tracing_mark_raw_write() as well */
6463 #define FAULTED_STR "<faulted>"
6464 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6465
6466         if (tracing_disabled)
6467                 return -EINVAL;
6468
6469         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6470                 return -EINVAL;
6471
6472         if (cnt > TRACE_BUF_SIZE)
6473                 cnt = TRACE_BUF_SIZE;
6474
6475         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6476
6477         local_save_flags(irq_flags);
6478         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6479
6480         /* If less than "<faulted>", then make sure we can still add that */
6481         if (cnt < FAULTED_SIZE)
6482                 size += FAULTED_SIZE - cnt;
6483
6484         buffer = tr->trace_buffer.buffer;
6485         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6486                                             irq_flags, preempt_count());
6487         if (unlikely(!event))
6488                 /* Ring buffer disabled, return as if not open for write */
6489                 return -EBADF;
6490
6491         entry = ring_buffer_event_data(event);
6492         entry->ip = _THIS_IP_;
6493
6494         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6495         if (len) {
6496                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6497                 cnt = FAULTED_SIZE;
6498                 written = -EFAULT;
6499         } else
6500                 written = cnt;
6501         len = cnt;
6502
6503         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6504                 /* do not add \n before testing triggers, but add \0 */
6505                 entry->buf[cnt] = '\0';
6506                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6507         }
6508
6509         if (entry->buf[cnt - 1] != '\n') {
6510                 entry->buf[cnt] = '\n';
6511                 entry->buf[cnt + 1] = '\0';
6512         } else
6513                 entry->buf[cnt] = '\0';
6514
6515         __buffer_unlock_commit(buffer, event);
6516
6517         if (tt)
6518                 event_triggers_post_call(tr->trace_marker_file, tt);
6519
6520         if (written > 0)
6521                 *fpos += written;
6522
6523         return written;
6524 }
6525
6526 /* Limit it for now to 3K (including tag) */
6527 #define RAW_DATA_MAX_SIZE (1024*3)
6528
6529 static ssize_t
6530 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6531                                         size_t cnt, loff_t *fpos)
6532 {
6533         struct trace_array *tr = filp->private_data;
6534         struct ring_buffer_event *event;
6535         struct ring_buffer *buffer;
6536         struct raw_data_entry *entry;
6537         unsigned long irq_flags;
6538         ssize_t written;
6539         int size;
6540         int len;
6541
6542 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6543
6544         if (tracing_disabled)
6545                 return -EINVAL;
6546
6547         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6548                 return -EINVAL;
6549
6550         /* The marker must at least have a tag id */
6551         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6552                 return -EINVAL;
6553
6554         if (cnt > TRACE_BUF_SIZE)
6555                 cnt = TRACE_BUF_SIZE;
6556
6557         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6558
6559         local_save_flags(irq_flags);
6560         size = sizeof(*entry) + cnt;
6561         if (cnt < FAULT_SIZE_ID)
6562                 size += FAULT_SIZE_ID - cnt;
6563
6564         buffer = tr->trace_buffer.buffer;
6565         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6566                                             irq_flags, preempt_count());
6567         if (!event)
6568                 /* Ring buffer disabled, return as if not open for write */
6569                 return -EBADF;
6570
6571         entry = ring_buffer_event_data(event);
6572
6573         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6574         if (len) {
6575                 entry->id = -1;
6576                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6577                 written = -EFAULT;
6578         } else
6579                 written = cnt;
6580
6581         __buffer_unlock_commit(buffer, event);
6582
6583         if (written > 0)
6584                 *fpos += written;
6585
6586         return written;
6587 }
6588
6589 static int tracing_clock_show(struct seq_file *m, void *v)
6590 {
6591         struct trace_array *tr = m->private;
6592         int i;
6593
6594         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6595                 seq_printf(m,
6596                         "%s%s%s%s", i ? " " : "",
6597                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6598                         i == tr->clock_id ? "]" : "");
6599         seq_putc(m, '\n');
6600
6601         return 0;
6602 }
6603
6604 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6605 {
6606         int i;
6607
6608         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6609                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6610                         break;
6611         }
6612         if (i == ARRAY_SIZE(trace_clocks))
6613                 return -EINVAL;
6614
6615         mutex_lock(&trace_types_lock);
6616
6617         tr->clock_id = i;
6618
6619         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6620
6621         /*
6622          * New clock may not be consistent with the previous clock.
6623          * Reset the buffer so that it doesn't have incomparable timestamps.
6624          */
6625         tracing_reset_online_cpus(&tr->trace_buffer);
6626
6627 #ifdef CONFIG_TRACER_MAX_TRACE
6628         if (tr->max_buffer.buffer)
6629                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6630         tracing_reset_online_cpus(&tr->max_buffer);
6631 #endif
6632
6633         mutex_unlock(&trace_types_lock);
6634
6635         return 0;
6636 }
6637
6638 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6639                                    size_t cnt, loff_t *fpos)
6640 {
6641         struct seq_file *m = filp->private_data;
6642         struct trace_array *tr = m->private;
6643         char buf[64];
6644         const char *clockstr;
6645         int ret;
6646
6647         if (cnt >= sizeof(buf))
6648                 return -EINVAL;
6649
6650         if (copy_from_user(buf, ubuf, cnt))
6651                 return -EFAULT;
6652
6653         buf[cnt] = 0;
6654
6655         clockstr = strstrip(buf);
6656
6657         ret = tracing_set_clock(tr, clockstr);
6658         if (ret)
6659                 return ret;
6660
6661         *fpos += cnt;
6662
6663         return cnt;
6664 }
6665
6666 static int tracing_clock_open(struct inode *inode, struct file *file)
6667 {
6668         struct trace_array *tr = inode->i_private;
6669         int ret;
6670
6671         ret = tracing_check_open_get_tr(tr);
6672         if (ret)
6673                 return ret;
6674
6675         ret = single_open(file, tracing_clock_show, inode->i_private);
6676         if (ret < 0)
6677                 trace_array_put(tr);
6678
6679         return ret;
6680 }
6681
6682 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6683 {
6684         struct trace_array *tr = m->private;
6685
6686         mutex_lock(&trace_types_lock);
6687
6688         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6689                 seq_puts(m, "delta [absolute]\n");
6690         else
6691                 seq_puts(m, "[delta] absolute\n");
6692
6693         mutex_unlock(&trace_types_lock);
6694
6695         return 0;
6696 }
6697
6698 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6699 {
6700         struct trace_array *tr = inode->i_private;
6701         int ret;
6702
6703         ret = tracing_check_open_get_tr(tr);
6704         if (ret)
6705                 return ret;
6706
6707         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6708         if (ret < 0)
6709                 trace_array_put(tr);
6710
6711         return ret;
6712 }
6713
6714 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6715 {
6716         int ret = 0;
6717
6718         mutex_lock(&trace_types_lock);
6719
6720         if (abs && tr->time_stamp_abs_ref++)
6721                 goto out;
6722
6723         if (!abs) {
6724                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6725                         ret = -EINVAL;
6726                         goto out;
6727                 }
6728
6729                 if (--tr->time_stamp_abs_ref)
6730                         goto out;
6731         }
6732
6733         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6734
6735 #ifdef CONFIG_TRACER_MAX_TRACE
6736         if (tr->max_buffer.buffer)
6737                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6738 #endif
6739  out:
6740         mutex_unlock(&trace_types_lock);
6741
6742         return ret;
6743 }
6744
6745 struct ftrace_buffer_info {
6746         struct trace_iterator   iter;
6747         void                    *spare;
6748         unsigned int            spare_cpu;
6749         unsigned int            read;
6750 };
6751
6752 #ifdef CONFIG_TRACER_SNAPSHOT
6753 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6754 {
6755         struct trace_array *tr = inode->i_private;
6756         struct trace_iterator *iter;
6757         struct seq_file *m;
6758         int ret;
6759
6760         ret = tracing_check_open_get_tr(tr);
6761         if (ret)
6762                 return ret;
6763
6764         if (file->f_mode & FMODE_READ) {
6765                 iter = __tracing_open(inode, file, true);
6766                 if (IS_ERR(iter))
6767                         ret = PTR_ERR(iter);
6768         } else {
6769                 /* Writes still need the seq_file to hold the private data */
6770                 ret = -ENOMEM;
6771                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6772                 if (!m)
6773                         goto out;
6774                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6775                 if (!iter) {
6776                         kfree(m);
6777                         goto out;
6778                 }
6779                 ret = 0;
6780
6781                 iter->tr = tr;
6782                 iter->trace_buffer = &tr->max_buffer;
6783                 iter->cpu_file = tracing_get_cpu(inode);
6784                 m->private = iter;
6785                 file->private_data = m;
6786         }
6787 out:
6788         if (ret < 0)
6789                 trace_array_put(tr);
6790
6791         return ret;
6792 }
6793
6794 static void tracing_swap_cpu_buffer(void *tr)
6795 {
6796         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
6797 }
6798
6799 static ssize_t
6800 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6801                        loff_t *ppos)
6802 {
6803         struct seq_file *m = filp->private_data;
6804         struct trace_iterator *iter = m->private;
6805         struct trace_array *tr = iter->tr;
6806         unsigned long val;
6807         int ret;
6808
6809         ret = tracing_update_buffers();
6810         if (ret < 0)
6811                 return ret;
6812
6813         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6814         if (ret)
6815                 return ret;
6816
6817         mutex_lock(&trace_types_lock);
6818
6819         if (tr->current_trace->use_max_tr) {
6820                 ret = -EBUSY;
6821                 goto out;
6822         }
6823
6824         local_irq_disable();
6825         arch_spin_lock(&tr->max_lock);
6826         if (tr->cond_snapshot)
6827                 ret = -EBUSY;
6828         arch_spin_unlock(&tr->max_lock);
6829         local_irq_enable();
6830         if (ret)
6831                 goto out;
6832
6833         switch (val) {
6834         case 0:
6835                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6836                         ret = -EINVAL;
6837                         break;
6838                 }
6839                 if (tr->allocated_snapshot)
6840                         free_snapshot(tr);
6841                 break;
6842         case 1:
6843 /* Only allow per-cpu swap if the ring buffer supports it */
6844 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6845                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6846                         ret = -EINVAL;
6847                         break;
6848                 }
6849 #endif
6850                 if (tr->allocated_snapshot)
6851                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6852                                         &tr->trace_buffer, iter->cpu_file);
6853                 else
6854                         ret = tracing_alloc_snapshot_instance(tr);
6855                 if (ret < 0)
6856                         break;
6857                 /* Now, we're going to swap */
6858                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
6859                         local_irq_disable();
6860                         update_max_tr(tr, current, smp_processor_id(), NULL);
6861                         local_irq_enable();
6862                 } else {
6863                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
6864                                                  (void *)tr, 1);
6865                 }
6866                 break;
6867         default:
6868                 if (tr->allocated_snapshot) {
6869                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6870                                 tracing_reset_online_cpus(&tr->max_buffer);
6871                         else
6872                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6873                 }
6874                 break;
6875         }
6876
6877         if (ret >= 0) {
6878                 *ppos += cnt;
6879                 ret = cnt;
6880         }
6881 out:
6882         mutex_unlock(&trace_types_lock);
6883         return ret;
6884 }
6885
6886 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6887 {
6888         struct seq_file *m = file->private_data;
6889         int ret;
6890
6891         ret = tracing_release(inode, file);
6892
6893         if (file->f_mode & FMODE_READ)
6894                 return ret;
6895
6896         /* If write only, the seq_file is just a stub */
6897         if (m)
6898                 kfree(m->private);
6899         kfree(m);
6900
6901         return 0;
6902 }
6903
6904 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6905 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6906                                     size_t count, loff_t *ppos);
6907 static int tracing_buffers_release(struct inode *inode, struct file *file);
6908 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6909                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6910
6911 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6912 {
6913         struct ftrace_buffer_info *info;
6914         int ret;
6915
6916         /* The following checks for tracefs lockdown */
6917         ret = tracing_buffers_open(inode, filp);
6918         if (ret < 0)
6919                 return ret;
6920
6921         info = filp->private_data;
6922
6923         if (info->iter.trace->use_max_tr) {
6924                 tracing_buffers_release(inode, filp);
6925                 return -EBUSY;
6926         }
6927
6928         info->iter.snapshot = true;
6929         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6930
6931         return ret;
6932 }
6933
6934 #endif /* CONFIG_TRACER_SNAPSHOT */
6935
6936
6937 static const struct file_operations tracing_thresh_fops = {
6938         .open           = tracing_open_generic,
6939         .read           = tracing_thresh_read,
6940         .write          = tracing_thresh_write,
6941         .llseek         = generic_file_llseek,
6942 };
6943
6944 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6945 static const struct file_operations tracing_max_lat_fops = {
6946         .open           = tracing_open_generic,
6947         .read           = tracing_max_lat_read,
6948         .write          = tracing_max_lat_write,
6949         .llseek         = generic_file_llseek,
6950 };
6951 #endif
6952
6953 static const struct file_operations set_tracer_fops = {
6954         .open           = tracing_open_generic_tr,
6955         .read           = tracing_set_trace_read,
6956         .write          = tracing_set_trace_write,
6957         .llseek         = generic_file_llseek,
6958         .release        = tracing_release_generic_tr,
6959 };
6960
6961 static const struct file_operations tracing_pipe_fops = {
6962         .open           = tracing_open_pipe,
6963         .poll           = tracing_poll_pipe,
6964         .read           = tracing_read_pipe,
6965         .splice_read    = tracing_splice_read_pipe,
6966         .release        = tracing_release_pipe,
6967         .llseek         = no_llseek,
6968 };
6969
6970 static const struct file_operations tracing_entries_fops = {
6971         .open           = tracing_open_generic_tr,
6972         .read           = tracing_entries_read,
6973         .write          = tracing_entries_write,
6974         .llseek         = generic_file_llseek,
6975         .release        = tracing_release_generic_tr,
6976 };
6977
6978 static const struct file_operations tracing_total_entries_fops = {
6979         .open           = tracing_open_generic_tr,
6980         .read           = tracing_total_entries_read,
6981         .llseek         = generic_file_llseek,
6982         .release        = tracing_release_generic_tr,
6983 };
6984
6985 static const struct file_operations tracing_free_buffer_fops = {
6986         .open           = tracing_open_generic_tr,
6987         .write          = tracing_free_buffer_write,
6988         .release        = tracing_free_buffer_release,
6989 };
6990
6991 static const struct file_operations tracing_mark_fops = {
6992         .open           = tracing_open_generic_tr,
6993         .write          = tracing_mark_write,
6994         .llseek         = generic_file_llseek,
6995         .release        = tracing_release_generic_tr,
6996 };
6997
6998 static const struct file_operations tracing_mark_raw_fops = {
6999         .open           = tracing_open_generic_tr,
7000         .write          = tracing_mark_raw_write,
7001         .llseek         = generic_file_llseek,
7002         .release        = tracing_release_generic_tr,
7003 };
7004
7005 static const struct file_operations trace_clock_fops = {
7006         .open           = tracing_clock_open,
7007         .read           = seq_read,
7008         .llseek         = seq_lseek,
7009         .release        = tracing_single_release_tr,
7010         .write          = tracing_clock_write,
7011 };
7012
7013 static const struct file_operations trace_time_stamp_mode_fops = {
7014         .open           = tracing_time_stamp_mode_open,
7015         .read           = seq_read,
7016         .llseek         = seq_lseek,
7017         .release        = tracing_single_release_tr,
7018 };
7019
7020 #ifdef CONFIG_TRACER_SNAPSHOT
7021 static const struct file_operations snapshot_fops = {
7022         .open           = tracing_snapshot_open,
7023         .read           = seq_read,
7024         .write          = tracing_snapshot_write,
7025         .llseek         = tracing_lseek,
7026         .release        = tracing_snapshot_release,
7027 };
7028
7029 static const struct file_operations snapshot_raw_fops = {
7030         .open           = snapshot_raw_open,
7031         .read           = tracing_buffers_read,
7032         .release        = tracing_buffers_release,
7033         .splice_read    = tracing_buffers_splice_read,
7034         .llseek         = no_llseek,
7035 };
7036
7037 #endif /* CONFIG_TRACER_SNAPSHOT */
7038
7039 #define TRACING_LOG_ERRS_MAX    8
7040 #define TRACING_LOG_LOC_MAX     128
7041
7042 #define CMD_PREFIX "  Command: "
7043
7044 struct err_info {
7045         const char      **errs; /* ptr to loc-specific array of err strings */
7046         u8              type;   /* index into errs -> specific err string */
7047         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7048         u64             ts;
7049 };
7050
7051 struct tracing_log_err {
7052         struct list_head        list;
7053         struct err_info         info;
7054         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7055         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7056 };
7057
7058 static DEFINE_MUTEX(tracing_err_log_lock);
7059
7060 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7061 {
7062         struct tracing_log_err *err;
7063
7064         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7065                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7066                 if (!err)
7067                         err = ERR_PTR(-ENOMEM);
7068                 else
7069                         tr->n_err_log_entries++;
7070
7071                 return err;
7072         }
7073
7074         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7075         list_del(&err->list);
7076
7077         return err;
7078 }
7079
7080 /**
7081  * err_pos - find the position of a string within a command for error careting
7082  * @cmd: The tracing command that caused the error
7083  * @str: The string to position the caret at within @cmd
7084  *
7085  * Finds the position of the first occurence of @str within @cmd.  The
7086  * return value can be passed to tracing_log_err() for caret placement
7087  * within @cmd.
7088  *
7089  * Returns the index within @cmd of the first occurence of @str or 0
7090  * if @str was not found.
7091  */
7092 unsigned int err_pos(char *cmd, const char *str)
7093 {
7094         char *found;
7095
7096         if (WARN_ON(!strlen(cmd)))
7097                 return 0;
7098
7099         found = strstr(cmd, str);
7100         if (found)
7101                 return found - cmd;
7102
7103         return 0;
7104 }
7105
7106 /**
7107  * tracing_log_err - write an error to the tracing error log
7108  * @tr: The associated trace array for the error (NULL for top level array)
7109  * @loc: A string describing where the error occurred
7110  * @cmd: The tracing command that caused the error
7111  * @errs: The array of loc-specific static error strings
7112  * @type: The index into errs[], which produces the specific static err string
7113  * @pos: The position the caret should be placed in the cmd
7114  *
7115  * Writes an error into tracing/error_log of the form:
7116  *
7117  * <loc>: error: <text>
7118  *   Command: <cmd>
7119  *              ^
7120  *
7121  * tracing/error_log is a small log file containing the last
7122  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7123  * unless there has been a tracing error, and the error log can be
7124  * cleared and have its memory freed by writing the empty string in
7125  * truncation mode to it i.e. echo > tracing/error_log.
7126  *
7127  * NOTE: the @errs array along with the @type param are used to
7128  * produce a static error string - this string is not copied and saved
7129  * when the error is logged - only a pointer to it is saved.  See
7130  * existing callers for examples of how static strings are typically
7131  * defined for use with tracing_log_err().
7132  */
7133 void tracing_log_err(struct trace_array *tr,
7134                      const char *loc, const char *cmd,
7135                      const char **errs, u8 type, u8 pos)
7136 {
7137         struct tracing_log_err *err;
7138
7139         if (!tr)
7140                 tr = &global_trace;
7141
7142         mutex_lock(&tracing_err_log_lock);
7143         err = get_tracing_log_err(tr);
7144         if (PTR_ERR(err) == -ENOMEM) {
7145                 mutex_unlock(&tracing_err_log_lock);
7146                 return;
7147         }
7148
7149         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7150         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7151
7152         err->info.errs = errs;
7153         err->info.type = type;
7154         err->info.pos = pos;
7155         err->info.ts = local_clock();
7156
7157         list_add_tail(&err->list, &tr->err_log);
7158         mutex_unlock(&tracing_err_log_lock);
7159 }
7160
7161 static void clear_tracing_err_log(struct trace_array *tr)
7162 {
7163         struct tracing_log_err *err, *next;
7164
7165         mutex_lock(&tracing_err_log_lock);
7166         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7167                 list_del(&err->list);
7168                 kfree(err);
7169         }
7170
7171         tr->n_err_log_entries = 0;
7172         mutex_unlock(&tracing_err_log_lock);
7173 }
7174
7175 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7176 {
7177         struct trace_array *tr = m->private;
7178
7179         mutex_lock(&tracing_err_log_lock);
7180
7181         return seq_list_start(&tr->err_log, *pos);
7182 }
7183
7184 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7185 {
7186         struct trace_array *tr = m->private;
7187
7188         return seq_list_next(v, &tr->err_log, pos);
7189 }
7190
7191 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7192 {
7193         mutex_unlock(&tracing_err_log_lock);
7194 }
7195
7196 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7197 {
7198         u8 i;
7199
7200         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7201                 seq_putc(m, ' ');
7202         for (i = 0; i < pos; i++)
7203                 seq_putc(m, ' ');
7204         seq_puts(m, "^\n");
7205 }
7206
7207 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7208 {
7209         struct tracing_log_err *err = v;
7210
7211         if (err) {
7212                 const char *err_text = err->info.errs[err->info.type];
7213                 u64 sec = err->info.ts;
7214                 u32 nsec;
7215
7216                 nsec = do_div(sec, NSEC_PER_SEC);
7217                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7218                            err->loc, err_text);
7219                 seq_printf(m, "%s", err->cmd);
7220                 tracing_err_log_show_pos(m, err->info.pos);
7221         }
7222
7223         return 0;
7224 }
7225
7226 static const struct seq_operations tracing_err_log_seq_ops = {
7227         .start  = tracing_err_log_seq_start,
7228         .next   = tracing_err_log_seq_next,
7229         .stop   = tracing_err_log_seq_stop,
7230         .show   = tracing_err_log_seq_show
7231 };
7232
7233 static int tracing_err_log_open(struct inode *inode, struct file *file)
7234 {
7235         struct trace_array *tr = inode->i_private;
7236         int ret = 0;
7237
7238         ret = tracing_check_open_get_tr(tr);
7239         if (ret)
7240                 return ret;
7241
7242         /* If this file was opened for write, then erase contents */
7243         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7244                 clear_tracing_err_log(tr);
7245
7246         if (file->f_mode & FMODE_READ) {
7247                 ret = seq_open(file, &tracing_err_log_seq_ops);
7248                 if (!ret) {
7249                         struct seq_file *m = file->private_data;
7250                         m->private = tr;
7251                 } else {
7252                         trace_array_put(tr);
7253                 }
7254         }
7255         return ret;
7256 }
7257
7258 static ssize_t tracing_err_log_write(struct file *file,
7259                                      const char __user *buffer,
7260                                      size_t count, loff_t *ppos)
7261 {
7262         return count;
7263 }
7264
7265 static int tracing_err_log_release(struct inode *inode, struct file *file)
7266 {
7267         struct trace_array *tr = inode->i_private;
7268
7269         trace_array_put(tr);
7270
7271         if (file->f_mode & FMODE_READ)
7272                 seq_release(inode, file);
7273
7274         return 0;
7275 }
7276
7277 static const struct file_operations tracing_err_log_fops = {
7278         .open           = tracing_err_log_open,
7279         .write          = tracing_err_log_write,
7280         .read           = seq_read,
7281         .llseek         = tracing_lseek,
7282         .release        = tracing_err_log_release,
7283 };
7284
7285 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7286 {
7287         struct trace_array *tr = inode->i_private;
7288         struct ftrace_buffer_info *info;
7289         int ret;
7290
7291         ret = tracing_check_open_get_tr(tr);
7292         if (ret)
7293                 return ret;
7294
7295         info = kzalloc(sizeof(*info), GFP_KERNEL);
7296         if (!info) {
7297                 trace_array_put(tr);
7298                 return -ENOMEM;
7299         }
7300
7301         mutex_lock(&trace_types_lock);
7302
7303         info->iter.tr           = tr;
7304         info->iter.cpu_file     = tracing_get_cpu(inode);
7305         info->iter.trace        = tr->current_trace;
7306         info->iter.trace_buffer = &tr->trace_buffer;
7307         info->spare             = NULL;
7308         /* Force reading ring buffer for first read */
7309         info->read              = (unsigned int)-1;
7310
7311         filp->private_data = info;
7312
7313         tr->trace_ref++;
7314
7315         mutex_unlock(&trace_types_lock);
7316
7317         ret = nonseekable_open(inode, filp);
7318         if (ret < 0)
7319                 trace_array_put(tr);
7320
7321         return ret;
7322 }
7323
7324 static __poll_t
7325 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7326 {
7327         struct ftrace_buffer_info *info = filp->private_data;
7328         struct trace_iterator *iter = &info->iter;
7329
7330         return trace_poll(iter, filp, poll_table);
7331 }
7332
7333 static ssize_t
7334 tracing_buffers_read(struct file *filp, char __user *ubuf,
7335                      size_t count, loff_t *ppos)
7336 {
7337         struct ftrace_buffer_info *info = filp->private_data;
7338         struct trace_iterator *iter = &info->iter;
7339         ssize_t ret = 0;
7340         ssize_t size;
7341
7342         if (!count)
7343                 return 0;
7344
7345 #ifdef CONFIG_TRACER_MAX_TRACE
7346         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7347                 return -EBUSY;
7348 #endif
7349
7350         if (!info->spare) {
7351                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7352                                                           iter->cpu_file);
7353                 if (IS_ERR(info->spare)) {
7354                         ret = PTR_ERR(info->spare);
7355                         info->spare = NULL;
7356                 } else {
7357                         info->spare_cpu = iter->cpu_file;
7358                 }
7359         }
7360         if (!info->spare)
7361                 return ret;
7362
7363         /* Do we have previous read data to read? */
7364         if (info->read < PAGE_SIZE)
7365                 goto read;
7366
7367  again:
7368         trace_access_lock(iter->cpu_file);
7369         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7370                                     &info->spare,
7371                                     count,
7372                                     iter->cpu_file, 0);
7373         trace_access_unlock(iter->cpu_file);
7374
7375         if (ret < 0) {
7376                 if (trace_empty(iter)) {
7377                         if ((filp->f_flags & O_NONBLOCK))
7378                                 return -EAGAIN;
7379
7380                         ret = wait_on_pipe(iter, 0);
7381                         if (ret)
7382                                 return ret;
7383
7384                         goto again;
7385                 }
7386                 return 0;
7387         }
7388
7389         info->read = 0;
7390  read:
7391         size = PAGE_SIZE - info->read;
7392         if (size > count)
7393                 size = count;
7394
7395         ret = copy_to_user(ubuf, info->spare + info->read, size);
7396         if (ret == size)
7397                 return -EFAULT;
7398
7399         size -= ret;
7400
7401         *ppos += size;
7402         info->read += size;
7403
7404         return size;
7405 }
7406
7407 static int tracing_buffers_release(struct inode *inode, struct file *file)
7408 {
7409         struct ftrace_buffer_info *info = file->private_data;
7410         struct trace_iterator *iter = &info->iter;
7411
7412         mutex_lock(&trace_types_lock);
7413
7414         iter->tr->trace_ref--;
7415
7416         __trace_array_put(iter->tr);
7417
7418         if (info->spare)
7419                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7420                                            info->spare_cpu, info->spare);
7421         kfree(info);
7422
7423         mutex_unlock(&trace_types_lock);
7424
7425         return 0;
7426 }
7427
7428 struct buffer_ref {
7429         struct ring_buffer      *buffer;
7430         void                    *page;
7431         int                     cpu;
7432         refcount_t              refcount;
7433 };
7434
7435 static void buffer_ref_release(struct buffer_ref *ref)
7436 {
7437         if (!refcount_dec_and_test(&ref->refcount))
7438                 return;
7439         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7440         kfree(ref);
7441 }
7442
7443 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7444                                     struct pipe_buffer *buf)
7445 {
7446         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7447
7448         buffer_ref_release(ref);
7449         buf->private = 0;
7450 }
7451
7452 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7453                                 struct pipe_buffer *buf)
7454 {
7455         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7456
7457         if (refcount_read(&ref->refcount) > INT_MAX/2)
7458                 return false;
7459
7460         refcount_inc(&ref->refcount);
7461         return true;
7462 }
7463
7464 /* Pipe buffer operations for a buffer. */
7465 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7466         .confirm                = generic_pipe_buf_confirm,
7467         .release                = buffer_pipe_buf_release,
7468         .steal                  = generic_pipe_buf_nosteal,
7469         .get                    = buffer_pipe_buf_get,
7470 };
7471
7472 /*
7473  * Callback from splice_to_pipe(), if we need to release some pages
7474  * at the end of the spd in case we error'ed out in filling the pipe.
7475  */
7476 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7477 {
7478         struct buffer_ref *ref =
7479                 (struct buffer_ref *)spd->partial[i].private;
7480
7481         buffer_ref_release(ref);
7482         spd->partial[i].private = 0;
7483 }
7484
7485 static ssize_t
7486 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7487                             struct pipe_inode_info *pipe, size_t len,
7488                             unsigned int flags)
7489 {
7490         struct ftrace_buffer_info *info = file->private_data;
7491         struct trace_iterator *iter = &info->iter;
7492         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7493         struct page *pages_def[PIPE_DEF_BUFFERS];
7494         struct splice_pipe_desc spd = {
7495                 .pages          = pages_def,
7496                 .partial        = partial_def,
7497                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7498                 .ops            = &buffer_pipe_buf_ops,
7499                 .spd_release    = buffer_spd_release,
7500         };
7501         struct buffer_ref *ref;
7502         int entries, i;
7503         ssize_t ret = 0;
7504
7505 #ifdef CONFIG_TRACER_MAX_TRACE
7506         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7507                 return -EBUSY;
7508 #endif
7509
7510         if (*ppos & (PAGE_SIZE - 1))
7511                 return -EINVAL;
7512
7513         if (len & (PAGE_SIZE - 1)) {
7514                 if (len < PAGE_SIZE)
7515                         return -EINVAL;
7516                 len &= PAGE_MASK;
7517         }
7518
7519         if (splice_grow_spd(pipe, &spd))
7520                 return -ENOMEM;
7521
7522  again:
7523         trace_access_lock(iter->cpu_file);
7524         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7525
7526         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7527                 struct page *page;
7528                 int r;
7529
7530                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7531                 if (!ref) {
7532                         ret = -ENOMEM;
7533                         break;
7534                 }
7535
7536                 refcount_set(&ref->refcount, 1);
7537                 ref->buffer = iter->trace_buffer->buffer;
7538                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7539                 if (IS_ERR(ref->page)) {
7540                         ret = PTR_ERR(ref->page);
7541                         ref->page = NULL;
7542                         kfree(ref);
7543                         break;
7544                 }
7545                 ref->cpu = iter->cpu_file;
7546
7547                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7548                                           len, iter->cpu_file, 1);
7549                 if (r < 0) {
7550                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7551                                                    ref->page);
7552                         kfree(ref);
7553                         break;
7554                 }
7555
7556                 page = virt_to_page(ref->page);
7557
7558                 spd.pages[i] = page;
7559                 spd.partial[i].len = PAGE_SIZE;
7560                 spd.partial[i].offset = 0;
7561                 spd.partial[i].private = (unsigned long)ref;
7562                 spd.nr_pages++;
7563                 *ppos += PAGE_SIZE;
7564
7565                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7566         }
7567
7568         trace_access_unlock(iter->cpu_file);
7569         spd.nr_pages = i;
7570
7571         /* did we read anything? */
7572         if (!spd.nr_pages) {
7573                 if (ret)
7574                         goto out;
7575
7576                 ret = -EAGAIN;
7577                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7578                         goto out;
7579
7580                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7581                 if (ret)
7582                         goto out;
7583
7584                 goto again;
7585         }
7586
7587         ret = splice_to_pipe(pipe, &spd);
7588 out:
7589         splice_shrink_spd(&spd);
7590
7591         return ret;
7592 }
7593
7594 static const struct file_operations tracing_buffers_fops = {
7595         .open           = tracing_buffers_open,
7596         .read           = tracing_buffers_read,
7597         .poll           = tracing_buffers_poll,
7598         .release        = tracing_buffers_release,
7599         .splice_read    = tracing_buffers_splice_read,
7600         .llseek         = no_llseek,
7601 };
7602
7603 static ssize_t
7604 tracing_stats_read(struct file *filp, char __user *ubuf,
7605                    size_t count, loff_t *ppos)
7606 {
7607         struct inode *inode = file_inode(filp);
7608         struct trace_array *tr = inode->i_private;
7609         struct trace_buffer *trace_buf = &tr->trace_buffer;
7610         int cpu = tracing_get_cpu(inode);
7611         struct trace_seq *s;
7612         unsigned long cnt;
7613         unsigned long long t;
7614         unsigned long usec_rem;
7615
7616         s = kmalloc(sizeof(*s), GFP_KERNEL);
7617         if (!s)
7618                 return -ENOMEM;
7619
7620         trace_seq_init(s);
7621
7622         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7623         trace_seq_printf(s, "entries: %ld\n", cnt);
7624
7625         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7626         trace_seq_printf(s, "overrun: %ld\n", cnt);
7627
7628         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7629         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7630
7631         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7632         trace_seq_printf(s, "bytes: %ld\n", cnt);
7633
7634         if (trace_clocks[tr->clock_id].in_ns) {
7635                 /* local or global for trace_clock */
7636                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7637                 usec_rem = do_div(t, USEC_PER_SEC);
7638                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7639                                                                 t, usec_rem);
7640
7641                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7642                 usec_rem = do_div(t, USEC_PER_SEC);
7643                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7644         } else {
7645                 /* counter or tsc mode for trace_clock */
7646                 trace_seq_printf(s, "oldest event ts: %llu\n",
7647                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7648
7649                 trace_seq_printf(s, "now ts: %llu\n",
7650                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7651         }
7652
7653         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7654         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7655
7656         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7657         trace_seq_printf(s, "read events: %ld\n", cnt);
7658
7659         count = simple_read_from_buffer(ubuf, count, ppos,
7660                                         s->buffer, trace_seq_used(s));
7661
7662         kfree(s);
7663
7664         return count;
7665 }
7666
7667 static const struct file_operations tracing_stats_fops = {
7668         .open           = tracing_open_generic_tr,
7669         .read           = tracing_stats_read,
7670         .llseek         = generic_file_llseek,
7671         .release        = tracing_release_generic_tr,
7672 };
7673
7674 #ifdef CONFIG_DYNAMIC_FTRACE
7675
7676 static ssize_t
7677 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7678                   size_t cnt, loff_t *ppos)
7679 {
7680         ssize_t ret;
7681         char *buf;
7682         int r;
7683
7684         /* 256 should be plenty to hold the amount needed */
7685         buf = kmalloc(256, GFP_KERNEL);
7686         if (!buf)
7687                 return -ENOMEM;
7688
7689         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7690                       ftrace_update_tot_cnt,
7691                       ftrace_number_of_pages,
7692                       ftrace_number_of_groups);
7693
7694         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7695         kfree(buf);
7696         return ret;
7697 }
7698
7699 static const struct file_operations tracing_dyn_info_fops = {
7700         .open           = tracing_open_generic,
7701         .read           = tracing_read_dyn_info,
7702         .llseek         = generic_file_llseek,
7703 };
7704 #endif /* CONFIG_DYNAMIC_FTRACE */
7705
7706 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7707 static void
7708 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7709                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7710                 void *data)
7711 {
7712         tracing_snapshot_instance(tr);
7713 }
7714
7715 static void
7716 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7717                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7718                       void *data)
7719 {
7720         struct ftrace_func_mapper *mapper = data;
7721         long *count = NULL;
7722
7723         if (mapper)
7724                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7725
7726         if (count) {
7727
7728                 if (*count <= 0)
7729                         return;
7730
7731                 (*count)--;
7732         }
7733
7734         tracing_snapshot_instance(tr);
7735 }
7736
7737 static int
7738 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7739                       struct ftrace_probe_ops *ops, void *data)
7740 {
7741         struct ftrace_func_mapper *mapper = data;
7742         long *count = NULL;
7743
7744         seq_printf(m, "%ps:", (void *)ip);
7745
7746         seq_puts(m, "snapshot");
7747
7748         if (mapper)
7749                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7750
7751         if (count)
7752                 seq_printf(m, ":count=%ld\n", *count);
7753         else
7754                 seq_puts(m, ":unlimited\n");
7755
7756         return 0;
7757 }
7758
7759 static int
7760 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7761                      unsigned long ip, void *init_data, void **data)
7762 {
7763         struct ftrace_func_mapper *mapper = *data;
7764
7765         if (!mapper) {
7766                 mapper = allocate_ftrace_func_mapper();
7767                 if (!mapper)
7768                         return -ENOMEM;
7769                 *data = mapper;
7770         }
7771
7772         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7773 }
7774
7775 static void
7776 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7777                      unsigned long ip, void *data)
7778 {
7779         struct ftrace_func_mapper *mapper = data;
7780
7781         if (!ip) {
7782                 if (!mapper)
7783                         return;
7784                 free_ftrace_func_mapper(mapper, NULL);
7785                 return;
7786         }
7787
7788         ftrace_func_mapper_remove_ip(mapper, ip);
7789 }
7790
7791 static struct ftrace_probe_ops snapshot_probe_ops = {
7792         .func                   = ftrace_snapshot,
7793         .print                  = ftrace_snapshot_print,
7794 };
7795
7796 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7797         .func                   = ftrace_count_snapshot,
7798         .print                  = ftrace_snapshot_print,
7799         .init                   = ftrace_snapshot_init,
7800         .free                   = ftrace_snapshot_free,
7801 };
7802
7803 static int
7804 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7805                                char *glob, char *cmd, char *param, int enable)
7806 {
7807         struct ftrace_probe_ops *ops;
7808         void *count = (void *)-1;
7809         char *number;
7810         int ret;
7811
7812         if (!tr)
7813                 return -ENODEV;
7814
7815         /* hash funcs only work with set_ftrace_filter */
7816         if (!enable)
7817                 return -EINVAL;
7818
7819         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7820
7821         if (glob[0] == '!')
7822                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7823
7824         if (!param)
7825                 goto out_reg;
7826
7827         number = strsep(&param, ":");
7828
7829         if (!strlen(number))
7830                 goto out_reg;
7831
7832         /*
7833          * We use the callback data field (which is a pointer)
7834          * as our counter.
7835          */
7836         ret = kstrtoul(number, 0, (unsigned long *)&count);
7837         if (ret)
7838                 return ret;
7839
7840  out_reg:
7841         ret = tracing_alloc_snapshot_instance(tr);
7842         if (ret < 0)
7843                 goto out;
7844
7845         ret = register_ftrace_function_probe(glob, tr, ops, count);
7846
7847  out:
7848         return ret < 0 ? ret : 0;
7849 }
7850
7851 static struct ftrace_func_command ftrace_snapshot_cmd = {
7852         .name                   = "snapshot",
7853         .func                   = ftrace_trace_snapshot_callback,
7854 };
7855
7856 static __init int register_snapshot_cmd(void)
7857 {
7858         return register_ftrace_command(&ftrace_snapshot_cmd);
7859 }
7860 #else
7861 static inline __init int register_snapshot_cmd(void) { return 0; }
7862 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7863
7864 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7865 {
7866         if (WARN_ON(!tr->dir))
7867                 return ERR_PTR(-ENODEV);
7868
7869         /* Top directory uses NULL as the parent */
7870         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7871                 return NULL;
7872
7873         /* All sub buffers have a descriptor */
7874         return tr->dir;
7875 }
7876
7877 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7878 {
7879         struct dentry *d_tracer;
7880
7881         if (tr->percpu_dir)
7882                 return tr->percpu_dir;
7883
7884         d_tracer = tracing_get_dentry(tr);
7885         if (IS_ERR(d_tracer))
7886                 return NULL;
7887
7888         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7889
7890         WARN_ONCE(!tr->percpu_dir,
7891                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7892
7893         return tr->percpu_dir;
7894 }
7895
7896 static struct dentry *
7897 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7898                       void *data, long cpu, const struct file_operations *fops)
7899 {
7900         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7901
7902         if (ret) /* See tracing_get_cpu() */
7903                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7904         return ret;
7905 }
7906
7907 static void
7908 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7909 {
7910         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7911         struct dentry *d_cpu;
7912         char cpu_dir[30]; /* 30 characters should be more than enough */
7913
7914         if (!d_percpu)
7915                 return;
7916
7917         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7918         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7919         if (!d_cpu) {
7920                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7921                 return;
7922         }
7923
7924         /* per cpu trace_pipe */
7925         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7926                                 tr, cpu, &tracing_pipe_fops);
7927
7928         /* per cpu trace */
7929         trace_create_cpu_file("trace", 0644, d_cpu,
7930                                 tr, cpu, &tracing_fops);
7931
7932         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7933                                 tr, cpu, &tracing_buffers_fops);
7934
7935         trace_create_cpu_file("stats", 0444, d_cpu,
7936                                 tr, cpu, &tracing_stats_fops);
7937
7938         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7939                                 tr, cpu, &tracing_entries_fops);
7940
7941 #ifdef CONFIG_TRACER_SNAPSHOT
7942         trace_create_cpu_file("snapshot", 0644, d_cpu,
7943                                 tr, cpu, &snapshot_fops);
7944
7945         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7946                                 tr, cpu, &snapshot_raw_fops);
7947 #endif
7948 }
7949
7950 #ifdef CONFIG_FTRACE_SELFTEST
7951 /* Let selftest have access to static functions in this file */
7952 #include "trace_selftest.c"
7953 #endif
7954
7955 static ssize_t
7956 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7957                         loff_t *ppos)
7958 {
7959         struct trace_option_dentry *topt = filp->private_data;
7960         char *buf;
7961
7962         if (topt->flags->val & topt->opt->bit)
7963                 buf = "1\n";
7964         else
7965                 buf = "0\n";
7966
7967         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7968 }
7969
7970 static ssize_t
7971 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7972                          loff_t *ppos)
7973 {
7974         struct trace_option_dentry *topt = filp->private_data;
7975         unsigned long val;
7976         int ret;
7977
7978         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7979         if (ret)
7980                 return ret;
7981
7982         if (val != 0 && val != 1)
7983                 return -EINVAL;
7984
7985         if (!!(topt->flags->val & topt->opt->bit) != val) {
7986                 mutex_lock(&trace_types_lock);
7987                 ret = __set_tracer_option(topt->tr, topt->flags,
7988                                           topt->opt, !val);
7989                 mutex_unlock(&trace_types_lock);
7990                 if (ret)
7991                         return ret;
7992         }
7993
7994         *ppos += cnt;
7995
7996         return cnt;
7997 }
7998
7999 static int tracing_open_options(struct inode *inode, struct file *filp)
8000 {
8001         struct trace_option_dentry *topt = inode->i_private;
8002         int ret;
8003
8004         ret = tracing_check_open_get_tr(topt->tr);
8005         if (ret)
8006                 return ret;
8007
8008         filp->private_data = inode->i_private;
8009         return 0;
8010 }
8011
8012 static int tracing_release_options(struct inode *inode, struct file *file)
8013 {
8014         struct trace_option_dentry *topt = file->private_data;
8015
8016         trace_array_put(topt->tr);
8017         return 0;
8018 }
8019
8020 static const struct file_operations trace_options_fops = {
8021         .open = tracing_open_options,
8022         .read = trace_options_read,
8023         .write = trace_options_write,
8024         .llseek = generic_file_llseek,
8025         .release = tracing_release_options,
8026 };
8027
8028 /*
8029  * In order to pass in both the trace_array descriptor as well as the index
8030  * to the flag that the trace option file represents, the trace_array
8031  * has a character array of trace_flags_index[], which holds the index
8032  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8033  * The address of this character array is passed to the flag option file
8034  * read/write callbacks.
8035  *
8036  * In order to extract both the index and the trace_array descriptor,
8037  * get_tr_index() uses the following algorithm.
8038  *
8039  *   idx = *ptr;
8040  *
8041  * As the pointer itself contains the address of the index (remember
8042  * index[1] == 1).
8043  *
8044  * Then to get the trace_array descriptor, by subtracting that index
8045  * from the ptr, we get to the start of the index itself.
8046  *
8047  *   ptr - idx == &index[0]
8048  *
8049  * Then a simple container_of() from that pointer gets us to the
8050  * trace_array descriptor.
8051  */
8052 static void get_tr_index(void *data, struct trace_array **ptr,
8053                          unsigned int *pindex)
8054 {
8055         *pindex = *(unsigned char *)data;
8056
8057         *ptr = container_of(data - *pindex, struct trace_array,
8058                             trace_flags_index);
8059 }
8060
8061 static ssize_t
8062 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8063                         loff_t *ppos)
8064 {
8065         void *tr_index = filp->private_data;
8066         struct trace_array *tr;
8067         unsigned int index;
8068         char *buf;
8069
8070         get_tr_index(tr_index, &tr, &index);
8071
8072         if (tr->trace_flags & (1 << index))
8073                 buf = "1\n";
8074         else
8075                 buf = "0\n";
8076
8077         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8078 }
8079
8080 static ssize_t
8081 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8082                          loff_t *ppos)
8083 {
8084         void *tr_index = filp->private_data;
8085         struct trace_array *tr;
8086         unsigned int index;
8087         unsigned long val;
8088         int ret;
8089
8090         get_tr_index(tr_index, &tr, &index);
8091
8092         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8093         if (ret)
8094                 return ret;
8095
8096         if (val != 0 && val != 1)
8097                 return -EINVAL;
8098
8099         mutex_lock(&event_mutex);
8100         mutex_lock(&trace_types_lock);
8101         ret = set_tracer_flag(tr, 1 << index, val);
8102         mutex_unlock(&trace_types_lock);
8103         mutex_unlock(&event_mutex);
8104
8105         if (ret < 0)
8106                 return ret;
8107
8108         *ppos += cnt;
8109
8110         return cnt;
8111 }
8112
8113 static const struct file_operations trace_options_core_fops = {
8114         .open = tracing_open_generic,
8115         .read = trace_options_core_read,
8116         .write = trace_options_core_write,
8117         .llseek = generic_file_llseek,
8118 };
8119
8120 struct dentry *trace_create_file(const char *name,
8121                                  umode_t mode,
8122                                  struct dentry *parent,
8123                                  void *data,
8124                                  const struct file_operations *fops)
8125 {
8126         struct dentry *ret;
8127
8128         ret = tracefs_create_file(name, mode, parent, data, fops);
8129         if (!ret)
8130                 pr_warn("Could not create tracefs '%s' entry\n", name);
8131
8132         return ret;
8133 }
8134
8135
8136 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8137 {
8138         struct dentry *d_tracer;
8139
8140         if (tr->options)
8141                 return tr->options;
8142
8143         d_tracer = tracing_get_dentry(tr);
8144         if (IS_ERR(d_tracer))
8145                 return NULL;
8146
8147         tr->options = tracefs_create_dir("options", d_tracer);
8148         if (!tr->options) {
8149                 pr_warn("Could not create tracefs directory 'options'\n");
8150                 return NULL;
8151         }
8152
8153         return tr->options;
8154 }
8155
8156 static void
8157 create_trace_option_file(struct trace_array *tr,
8158                          struct trace_option_dentry *topt,
8159                          struct tracer_flags *flags,
8160                          struct tracer_opt *opt)
8161 {
8162         struct dentry *t_options;
8163
8164         t_options = trace_options_init_dentry(tr);
8165         if (!t_options)
8166                 return;
8167
8168         topt->flags = flags;
8169         topt->opt = opt;
8170         topt->tr = tr;
8171
8172         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8173                                     &trace_options_fops);
8174
8175 }
8176
8177 static void
8178 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8179 {
8180         struct trace_option_dentry *topts;
8181         struct trace_options *tr_topts;
8182         struct tracer_flags *flags;
8183         struct tracer_opt *opts;
8184         int cnt;
8185         int i;
8186
8187         if (!tracer)
8188                 return;
8189
8190         flags = tracer->flags;
8191
8192         if (!flags || !flags->opts)
8193                 return;
8194
8195         /*
8196          * If this is an instance, only create flags for tracers
8197          * the instance may have.
8198          */
8199         if (!trace_ok_for_array(tracer, tr))
8200                 return;
8201
8202         for (i = 0; i < tr->nr_topts; i++) {
8203                 /* Make sure there's no duplicate flags. */
8204                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8205                         return;
8206         }
8207
8208         opts = flags->opts;
8209
8210         for (cnt = 0; opts[cnt].name; cnt++)
8211                 ;
8212
8213         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8214         if (!topts)
8215                 return;
8216
8217         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8218                             GFP_KERNEL);
8219         if (!tr_topts) {
8220                 kfree(topts);
8221                 return;
8222         }
8223
8224         tr->topts = tr_topts;
8225         tr->topts[tr->nr_topts].tracer = tracer;
8226         tr->topts[tr->nr_topts].topts = topts;
8227         tr->nr_topts++;
8228
8229         for (cnt = 0; opts[cnt].name; cnt++) {
8230                 create_trace_option_file(tr, &topts[cnt], flags,
8231                                          &opts[cnt]);
8232                 WARN_ONCE(topts[cnt].entry == NULL,
8233                           "Failed to create trace option: %s",
8234                           opts[cnt].name);
8235         }
8236 }
8237
8238 static struct dentry *
8239 create_trace_option_core_file(struct trace_array *tr,
8240                               const char *option, long index)
8241 {
8242         struct dentry *t_options;
8243
8244         t_options = trace_options_init_dentry(tr);
8245         if (!t_options)
8246                 return NULL;
8247
8248         return trace_create_file(option, 0644, t_options,
8249                                  (void *)&tr->trace_flags_index[index],
8250                                  &trace_options_core_fops);
8251 }
8252
8253 static void create_trace_options_dir(struct trace_array *tr)
8254 {
8255         struct dentry *t_options;
8256         bool top_level = tr == &global_trace;
8257         int i;
8258
8259         t_options = trace_options_init_dentry(tr);
8260         if (!t_options)
8261                 return;
8262
8263         for (i = 0; trace_options[i]; i++) {
8264                 if (top_level ||
8265                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8266                         create_trace_option_core_file(tr, trace_options[i], i);
8267         }
8268 }
8269
8270 static ssize_t
8271 rb_simple_read(struct file *filp, char __user *ubuf,
8272                size_t cnt, loff_t *ppos)
8273 {
8274         struct trace_array *tr = filp->private_data;
8275         char buf[64];
8276         int r;
8277
8278         r = tracer_tracing_is_on(tr);
8279         r = sprintf(buf, "%d\n", r);
8280
8281         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8282 }
8283
8284 static ssize_t
8285 rb_simple_write(struct file *filp, const char __user *ubuf,
8286                 size_t cnt, loff_t *ppos)
8287 {
8288         struct trace_array *tr = filp->private_data;
8289         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8290         unsigned long val;
8291         int ret;
8292
8293         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8294         if (ret)
8295                 return ret;
8296
8297         if (buffer) {
8298                 mutex_lock(&trace_types_lock);
8299                 if (!!val == tracer_tracing_is_on(tr)) {
8300                         val = 0; /* do nothing */
8301                 } else if (val) {
8302                         tracer_tracing_on(tr);
8303                         if (tr->current_trace->start)
8304                                 tr->current_trace->start(tr);
8305                 } else {
8306                         tracer_tracing_off(tr);
8307                         if (tr->current_trace->stop)
8308                                 tr->current_trace->stop(tr);
8309                 }
8310                 mutex_unlock(&trace_types_lock);
8311         }
8312
8313         (*ppos)++;
8314
8315         return cnt;
8316 }
8317
8318 static const struct file_operations rb_simple_fops = {
8319         .open           = tracing_open_generic_tr,
8320         .read           = rb_simple_read,
8321         .write          = rb_simple_write,
8322         .release        = tracing_release_generic_tr,
8323         .llseek         = default_llseek,
8324 };
8325
8326 static ssize_t
8327 buffer_percent_read(struct file *filp, char __user *ubuf,
8328                     size_t cnt, loff_t *ppos)
8329 {
8330         struct trace_array *tr = filp->private_data;
8331         char buf[64];
8332         int r;
8333
8334         r = tr->buffer_percent;
8335         r = sprintf(buf, "%d\n", r);
8336
8337         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8338 }
8339
8340 static ssize_t
8341 buffer_percent_write(struct file *filp, const char __user *ubuf,
8342                      size_t cnt, loff_t *ppos)
8343 {
8344         struct trace_array *tr = filp->private_data;
8345         unsigned long val;
8346         int ret;
8347
8348         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8349         if (ret)
8350                 return ret;
8351
8352         if (val > 100)
8353                 return -EINVAL;
8354
8355         tr->buffer_percent = val;
8356
8357         (*ppos)++;
8358
8359         return cnt;
8360 }
8361
8362 static const struct file_operations buffer_percent_fops = {
8363         .open           = tracing_open_generic_tr,
8364         .read           = buffer_percent_read,
8365         .write          = buffer_percent_write,
8366         .release        = tracing_release_generic_tr,
8367         .llseek         = default_llseek,
8368 };
8369
8370 static struct dentry *trace_instance_dir;
8371
8372 static void
8373 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8374
8375 static int
8376 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8377 {
8378         enum ring_buffer_flags rb_flags;
8379
8380         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8381
8382         buf->tr = tr;
8383
8384         buf->buffer = ring_buffer_alloc(size, rb_flags);
8385         if (!buf->buffer)
8386                 return -ENOMEM;
8387
8388         buf->data = alloc_percpu(struct trace_array_cpu);
8389         if (!buf->data) {
8390                 ring_buffer_free(buf->buffer);
8391                 buf->buffer = NULL;
8392                 return -ENOMEM;
8393         }
8394
8395         /* Allocate the first page for all buffers */
8396         set_buffer_entries(&tr->trace_buffer,
8397                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8398
8399         return 0;
8400 }
8401
8402 static int allocate_trace_buffers(struct trace_array *tr, int size)
8403 {
8404         int ret;
8405
8406         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8407         if (ret)
8408                 return ret;
8409
8410 #ifdef CONFIG_TRACER_MAX_TRACE
8411         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8412                                     allocate_snapshot ? size : 1);
8413         if (WARN_ON(ret)) {
8414                 ring_buffer_free(tr->trace_buffer.buffer);
8415                 tr->trace_buffer.buffer = NULL;
8416                 free_percpu(tr->trace_buffer.data);
8417                 tr->trace_buffer.data = NULL;
8418                 return -ENOMEM;
8419         }
8420         tr->allocated_snapshot = allocate_snapshot;
8421
8422         /*
8423          * Only the top level trace array gets its snapshot allocated
8424          * from the kernel command line.
8425          */
8426         allocate_snapshot = false;
8427 #endif
8428
8429         /*
8430          * Because of some magic with the way alloc_percpu() works on
8431          * x86_64, we need to synchronize the pgd of all the tables,
8432          * otherwise the trace events that happen in x86_64 page fault
8433          * handlers can't cope with accessing the chance that a
8434          * alloc_percpu()'d memory might be touched in the page fault trace
8435          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
8436          * calls in tracing, because something might get triggered within a
8437          * page fault trace event!
8438          */
8439         vmalloc_sync_mappings();
8440
8441         return 0;
8442 }
8443
8444 static void free_trace_buffer(struct trace_buffer *buf)
8445 {
8446         if (buf->buffer) {
8447                 ring_buffer_free(buf->buffer);
8448                 buf->buffer = NULL;
8449                 free_percpu(buf->data);
8450                 buf->data = NULL;
8451         }
8452 }
8453
8454 static void free_trace_buffers(struct trace_array *tr)
8455 {
8456         if (!tr)
8457                 return;
8458
8459         free_trace_buffer(&tr->trace_buffer);
8460
8461 #ifdef CONFIG_TRACER_MAX_TRACE
8462         free_trace_buffer(&tr->max_buffer);
8463 #endif
8464 }
8465
8466 static void init_trace_flags_index(struct trace_array *tr)
8467 {
8468         int i;
8469
8470         /* Used by the trace options files */
8471         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8472                 tr->trace_flags_index[i] = i;
8473 }
8474
8475 static void __update_tracer_options(struct trace_array *tr)
8476 {
8477         struct tracer *t;
8478
8479         for (t = trace_types; t; t = t->next)
8480                 add_tracer_options(tr, t);
8481 }
8482
8483 static void update_tracer_options(struct trace_array *tr)
8484 {
8485         mutex_lock(&trace_types_lock);
8486         tracer_options_updated = true;
8487         __update_tracer_options(tr);
8488         mutex_unlock(&trace_types_lock);
8489 }
8490
8491 struct trace_array *trace_array_create(const char *name)
8492 {
8493         struct trace_array *tr;
8494         int ret;
8495
8496         mutex_lock(&event_mutex);
8497         mutex_lock(&trace_types_lock);
8498
8499         ret = -EEXIST;
8500         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8501                 if (tr->name && strcmp(tr->name, name) == 0)
8502                         goto out_unlock;
8503         }
8504
8505         ret = -ENOMEM;
8506         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8507         if (!tr)
8508                 goto out_unlock;
8509
8510         tr->name = kstrdup(name, GFP_KERNEL);
8511         if (!tr->name)
8512                 goto out_free_tr;
8513
8514         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8515                 goto out_free_tr;
8516
8517         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8518
8519         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8520
8521         raw_spin_lock_init(&tr->start_lock);
8522
8523         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8524
8525         tr->current_trace = &nop_trace;
8526
8527         INIT_LIST_HEAD(&tr->systems);
8528         INIT_LIST_HEAD(&tr->events);
8529         INIT_LIST_HEAD(&tr->hist_vars);
8530         INIT_LIST_HEAD(&tr->err_log);
8531
8532         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8533                 goto out_free_tr;
8534
8535         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8536         if (!tr->dir)
8537                 goto out_free_tr;
8538
8539         ret = event_trace_add_tracer(tr->dir, tr);
8540         if (ret) {
8541                 tracefs_remove_recursive(tr->dir);
8542                 goto out_free_tr;
8543         }
8544
8545         ftrace_init_trace_array(tr);
8546
8547         init_tracer_tracefs(tr, tr->dir);
8548         init_trace_flags_index(tr);
8549         __update_tracer_options(tr);
8550
8551         list_add(&tr->list, &ftrace_trace_arrays);
8552
8553         mutex_unlock(&trace_types_lock);
8554         mutex_unlock(&event_mutex);
8555
8556         return tr;
8557
8558  out_free_tr:
8559         free_trace_buffers(tr);
8560         free_cpumask_var(tr->tracing_cpumask);
8561         kfree(tr->name);
8562         kfree(tr);
8563
8564  out_unlock:
8565         mutex_unlock(&trace_types_lock);
8566         mutex_unlock(&event_mutex);
8567
8568         return ERR_PTR(ret);
8569 }
8570 EXPORT_SYMBOL_GPL(trace_array_create);
8571
8572 static int instance_mkdir(const char *name)
8573 {
8574         return PTR_ERR_OR_ZERO(trace_array_create(name));
8575 }
8576
8577 static int __remove_instance(struct trace_array *tr)
8578 {
8579         int i;
8580
8581         if (tr->ref || (tr->current_trace && tr->trace_ref))
8582                 return -EBUSY;
8583
8584         list_del(&tr->list);
8585
8586         /* Disable all the flags that were enabled coming in */
8587         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8588                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8589                         set_tracer_flag(tr, 1 << i, 0);
8590         }
8591
8592         tracing_set_nop(tr);
8593         clear_ftrace_function_probes(tr);
8594         event_trace_del_tracer(tr);
8595         ftrace_clear_pids(tr);
8596         ftrace_destroy_function_files(tr);
8597         tracefs_remove_recursive(tr->dir);
8598         free_trace_buffers(tr);
8599         clear_tracing_err_log(tr);
8600
8601         for (i = 0; i < tr->nr_topts; i++) {
8602                 kfree(tr->topts[i].topts);
8603         }
8604         kfree(tr->topts);
8605
8606         free_cpumask_var(tr->tracing_cpumask);
8607         kfree(tr->name);
8608         kfree(tr);
8609         tr = NULL;
8610
8611         return 0;
8612 }
8613
8614 int trace_array_destroy(struct trace_array *this_tr)
8615 {
8616         struct trace_array *tr;
8617         int ret;
8618
8619         if (!this_tr)
8620                 return -EINVAL;
8621
8622         mutex_lock(&event_mutex);
8623         mutex_lock(&trace_types_lock);
8624
8625         ret = -ENODEV;
8626
8627         /* Making sure trace array exists before destroying it. */
8628         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8629                 if (tr == this_tr) {
8630                         ret = __remove_instance(tr);
8631                         break;
8632                 }
8633         }
8634
8635         mutex_unlock(&trace_types_lock);
8636         mutex_unlock(&event_mutex);
8637
8638         return ret;
8639 }
8640 EXPORT_SYMBOL_GPL(trace_array_destroy);
8641
8642 static int instance_rmdir(const char *name)
8643 {
8644         struct trace_array *tr;
8645         int ret;
8646
8647         mutex_lock(&event_mutex);
8648         mutex_lock(&trace_types_lock);
8649
8650         ret = -ENODEV;
8651         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8652                 if (tr->name && strcmp(tr->name, name) == 0) {
8653                         ret = __remove_instance(tr);
8654                         break;
8655                 }
8656         }
8657
8658         mutex_unlock(&trace_types_lock);
8659         mutex_unlock(&event_mutex);
8660
8661         return ret;
8662 }
8663
8664 static __init void create_trace_instances(struct dentry *d_tracer)
8665 {
8666         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8667                                                          instance_mkdir,
8668                                                          instance_rmdir);
8669         if (WARN_ON(!trace_instance_dir))
8670                 return;
8671 }
8672
8673 static void
8674 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8675 {
8676         struct trace_event_file *file;
8677         int cpu;
8678
8679         trace_create_file("available_tracers", 0444, d_tracer,
8680                         tr, &show_traces_fops);
8681
8682         trace_create_file("current_tracer", 0644, d_tracer,
8683                         tr, &set_tracer_fops);
8684
8685         trace_create_file("tracing_cpumask", 0644, d_tracer,
8686                           tr, &tracing_cpumask_fops);
8687
8688         trace_create_file("trace_options", 0644, d_tracer,
8689                           tr, &tracing_iter_fops);
8690
8691         trace_create_file("trace", 0644, d_tracer,
8692                           tr, &tracing_fops);
8693
8694         trace_create_file("trace_pipe", 0444, d_tracer,
8695                           tr, &tracing_pipe_fops);
8696
8697         trace_create_file("buffer_size_kb", 0644, d_tracer,
8698                           tr, &tracing_entries_fops);
8699
8700         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8701                           tr, &tracing_total_entries_fops);
8702
8703         trace_create_file("free_buffer", 0200, d_tracer,
8704                           tr, &tracing_free_buffer_fops);
8705
8706         trace_create_file("trace_marker", 0220, d_tracer,
8707                           tr, &tracing_mark_fops);
8708
8709         file = __find_event_file(tr, "ftrace", "print");
8710         if (file && file->dir)
8711                 trace_create_file("trigger", 0644, file->dir, file,
8712                                   &event_trigger_fops);
8713         tr->trace_marker_file = file;
8714
8715         trace_create_file("trace_marker_raw", 0220, d_tracer,
8716                           tr, &tracing_mark_raw_fops);
8717
8718         trace_create_file("trace_clock", 0644, d_tracer, tr,
8719                           &trace_clock_fops);
8720
8721         trace_create_file("tracing_on", 0644, d_tracer,
8722                           tr, &rb_simple_fops);
8723
8724         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8725                           &trace_time_stamp_mode_fops);
8726
8727         tr->buffer_percent = 50;
8728
8729         trace_create_file("buffer_percent", 0444, d_tracer,
8730                         tr, &buffer_percent_fops);
8731
8732         create_trace_options_dir(tr);
8733
8734 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8735         trace_create_file("tracing_max_latency", 0644, d_tracer,
8736                         &tr->max_latency, &tracing_max_lat_fops);
8737 #endif
8738
8739         if (ftrace_create_function_files(tr, d_tracer))
8740                 WARN(1, "Could not allocate function filter files");
8741
8742 #ifdef CONFIG_TRACER_SNAPSHOT
8743         trace_create_file("snapshot", 0644, d_tracer,
8744                           tr, &snapshot_fops);
8745 #endif
8746
8747         trace_create_file("error_log", 0644, d_tracer,
8748                           tr, &tracing_err_log_fops);
8749
8750         for_each_tracing_cpu(cpu)
8751                 tracing_init_tracefs_percpu(tr, cpu);
8752
8753         ftrace_init_tracefs(tr, d_tracer);
8754 }
8755
8756 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8757 {
8758         struct vfsmount *mnt;
8759         struct file_system_type *type;
8760
8761         /*
8762          * To maintain backward compatibility for tools that mount
8763          * debugfs to get to the tracing facility, tracefs is automatically
8764          * mounted to the debugfs/tracing directory.
8765          */
8766         type = get_fs_type("tracefs");
8767         if (!type)
8768                 return NULL;
8769         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8770         put_filesystem(type);
8771         if (IS_ERR(mnt))
8772                 return NULL;
8773         mntget(mnt);
8774
8775         return mnt;
8776 }
8777
8778 /**
8779  * tracing_init_dentry - initialize top level trace array
8780  *
8781  * This is called when creating files or directories in the tracing
8782  * directory. It is called via fs_initcall() by any of the boot up code
8783  * and expects to return the dentry of the top level tracing directory.
8784  */
8785 struct dentry *tracing_init_dentry(void)
8786 {
8787         struct trace_array *tr = &global_trace;
8788
8789         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8790                 pr_warning("Tracing disabled due to lockdown\n");
8791                 return ERR_PTR(-EPERM);
8792         }
8793
8794         /* The top level trace array uses  NULL as parent */
8795         if (tr->dir)
8796                 return NULL;
8797
8798         if (WARN_ON(!tracefs_initialized()) ||
8799                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8800                  WARN_ON(!debugfs_initialized())))
8801                 return ERR_PTR(-ENODEV);
8802
8803         /*
8804          * As there may still be users that expect the tracing
8805          * files to exist in debugfs/tracing, we must automount
8806          * the tracefs file system there, so older tools still
8807          * work with the newer kerenl.
8808          */
8809         tr->dir = debugfs_create_automount("tracing", NULL,
8810                                            trace_automount, NULL);
8811
8812         return NULL;
8813 }
8814
8815 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8816 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8817
8818 static void __init trace_eval_init(void)
8819 {
8820         int len;
8821
8822         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8823         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8824 }
8825
8826 #ifdef CONFIG_MODULES
8827 static void trace_module_add_evals(struct module *mod)
8828 {
8829         if (!mod->num_trace_evals)
8830                 return;
8831
8832         /*
8833          * Modules with bad taint do not have events created, do
8834          * not bother with enums either.
8835          */
8836         if (trace_module_has_bad_taint(mod))
8837                 return;
8838
8839         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8840 }
8841
8842 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8843 static void trace_module_remove_evals(struct module *mod)
8844 {
8845         union trace_eval_map_item *map;
8846         union trace_eval_map_item **last = &trace_eval_maps;
8847
8848         if (!mod->num_trace_evals)
8849                 return;
8850
8851         mutex_lock(&trace_eval_mutex);
8852
8853         map = trace_eval_maps;
8854
8855         while (map) {
8856                 if (map->head.mod == mod)
8857                         break;
8858                 map = trace_eval_jmp_to_tail(map);
8859                 last = &map->tail.next;
8860                 map = map->tail.next;
8861         }
8862         if (!map)
8863                 goto out;
8864
8865         *last = trace_eval_jmp_to_tail(map)->tail.next;
8866         kfree(map);
8867  out:
8868         mutex_unlock(&trace_eval_mutex);
8869 }
8870 #else
8871 static inline void trace_module_remove_evals(struct module *mod) { }
8872 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8873
8874 static int trace_module_notify(struct notifier_block *self,
8875                                unsigned long val, void *data)
8876 {
8877         struct module *mod = data;
8878
8879         switch (val) {
8880         case MODULE_STATE_COMING:
8881                 trace_module_add_evals(mod);
8882                 break;
8883         case MODULE_STATE_GOING:
8884                 trace_module_remove_evals(mod);
8885                 break;
8886         }
8887
8888         return 0;
8889 }
8890
8891 static struct notifier_block trace_module_nb = {
8892         .notifier_call = trace_module_notify,
8893         .priority = 0,
8894 };
8895 #endif /* CONFIG_MODULES */
8896
8897 static __init int tracer_init_tracefs(void)
8898 {
8899         struct dentry *d_tracer;
8900
8901         trace_access_lock_init();
8902
8903         d_tracer = tracing_init_dentry();
8904         if (IS_ERR(d_tracer))
8905                 return 0;
8906
8907         event_trace_init();
8908
8909         init_tracer_tracefs(&global_trace, d_tracer);
8910         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8911
8912         trace_create_file("tracing_thresh", 0644, d_tracer,
8913                         &global_trace, &tracing_thresh_fops);
8914
8915         trace_create_file("README", 0444, d_tracer,
8916                         NULL, &tracing_readme_fops);
8917
8918         trace_create_file("saved_cmdlines", 0444, d_tracer,
8919                         NULL, &tracing_saved_cmdlines_fops);
8920
8921         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8922                           NULL, &tracing_saved_cmdlines_size_fops);
8923
8924         trace_create_file("saved_tgids", 0444, d_tracer,
8925                         NULL, &tracing_saved_tgids_fops);
8926
8927         trace_eval_init();
8928
8929         trace_create_eval_file(d_tracer);
8930
8931 #ifdef CONFIG_MODULES
8932         register_module_notifier(&trace_module_nb);
8933 #endif
8934
8935 #ifdef CONFIG_DYNAMIC_FTRACE
8936         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8937                         NULL, &tracing_dyn_info_fops);
8938 #endif
8939
8940         create_trace_instances(d_tracer);
8941
8942         update_tracer_options(&global_trace);
8943
8944         return 0;
8945 }
8946
8947 static int trace_panic_handler(struct notifier_block *this,
8948                                unsigned long event, void *unused)
8949 {
8950         if (ftrace_dump_on_oops)
8951                 ftrace_dump(ftrace_dump_on_oops);
8952         return NOTIFY_OK;
8953 }
8954
8955 static struct notifier_block trace_panic_notifier = {
8956         .notifier_call  = trace_panic_handler,
8957         .next           = NULL,
8958         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8959 };
8960
8961 static int trace_die_handler(struct notifier_block *self,
8962                              unsigned long val,
8963                              void *data)
8964 {
8965         switch (val) {
8966         case DIE_OOPS:
8967                 if (ftrace_dump_on_oops)
8968                         ftrace_dump(ftrace_dump_on_oops);
8969                 break;
8970         default:
8971                 break;
8972         }
8973         return NOTIFY_OK;
8974 }
8975
8976 static struct notifier_block trace_die_notifier = {
8977         .notifier_call = trace_die_handler,
8978         .priority = 200
8979 };
8980
8981 /*
8982  * printk is set to max of 1024, we really don't need it that big.
8983  * Nothing should be printing 1000 characters anyway.
8984  */
8985 #define TRACE_MAX_PRINT         1000
8986
8987 /*
8988  * Define here KERN_TRACE so that we have one place to modify
8989  * it if we decide to change what log level the ftrace dump
8990  * should be at.
8991  */
8992 #define KERN_TRACE              KERN_EMERG
8993
8994 void
8995 trace_printk_seq(struct trace_seq *s)
8996 {
8997         /* Probably should print a warning here. */
8998         if (s->seq.len >= TRACE_MAX_PRINT)
8999                 s->seq.len = TRACE_MAX_PRINT;
9000
9001         /*
9002          * More paranoid code. Although the buffer size is set to
9003          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9004          * an extra layer of protection.
9005          */
9006         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9007                 s->seq.len = s->seq.size - 1;
9008
9009         /* should be zero ended, but we are paranoid. */
9010         s->buffer[s->seq.len] = 0;
9011
9012         printk(KERN_TRACE "%s", s->buffer);
9013
9014         trace_seq_init(s);
9015 }
9016
9017 void trace_init_global_iter(struct trace_iterator *iter)
9018 {
9019         iter->tr = &global_trace;
9020         iter->trace = iter->tr->current_trace;
9021         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9022         iter->trace_buffer = &global_trace.trace_buffer;
9023
9024         if (iter->trace && iter->trace->open)
9025                 iter->trace->open(iter);
9026
9027         /* Annotate start of buffers if we had overruns */
9028         if (ring_buffer_overruns(iter->trace_buffer->buffer))
9029                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9030
9031         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9032         if (trace_clocks[iter->tr->clock_id].in_ns)
9033                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9034 }
9035
9036 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9037 {
9038         /* use static because iter can be a bit big for the stack */
9039         static struct trace_iterator iter;
9040         static atomic_t dump_running;
9041         struct trace_array *tr = &global_trace;
9042         unsigned int old_userobj;
9043         unsigned long flags;
9044         int cnt = 0, cpu;
9045
9046         /* Only allow one dump user at a time. */
9047         if (atomic_inc_return(&dump_running) != 1) {
9048                 atomic_dec(&dump_running);
9049                 return;
9050         }
9051
9052         /*
9053          * Always turn off tracing when we dump.
9054          * We don't need to show trace output of what happens
9055          * between multiple crashes.
9056          *
9057          * If the user does a sysrq-z, then they can re-enable
9058          * tracing with echo 1 > tracing_on.
9059          */
9060         tracing_off();
9061
9062         local_irq_save(flags);
9063         printk_nmi_direct_enter();
9064
9065         /* Simulate the iterator */
9066         trace_init_global_iter(&iter);
9067
9068         for_each_tracing_cpu(cpu) {
9069                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9070         }
9071
9072         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9073
9074         /* don't look at user memory in panic mode */
9075         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9076
9077         switch (oops_dump_mode) {
9078         case DUMP_ALL:
9079                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9080                 break;
9081         case DUMP_ORIG:
9082                 iter.cpu_file = raw_smp_processor_id();
9083                 break;
9084         case DUMP_NONE:
9085                 goto out_enable;
9086         default:
9087                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9088                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9089         }
9090
9091         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9092
9093         /* Did function tracer already get disabled? */
9094         if (ftrace_is_dead()) {
9095                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9096                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9097         }
9098
9099         /*
9100          * We need to stop all tracing on all CPUS to read the
9101          * the next buffer. This is a bit expensive, but is
9102          * not done often. We fill all what we can read,
9103          * and then release the locks again.
9104          */
9105
9106         while (!trace_empty(&iter)) {
9107
9108                 if (!cnt)
9109                         printk(KERN_TRACE "---------------------------------\n");
9110
9111                 cnt++;
9112
9113                 trace_iterator_reset(&iter);
9114                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9115
9116                 if (trace_find_next_entry_inc(&iter) != NULL) {
9117                         int ret;
9118
9119                         ret = print_trace_line(&iter);
9120                         if (ret != TRACE_TYPE_NO_CONSUME)
9121                                 trace_consume(&iter);
9122                 }
9123                 touch_nmi_watchdog();
9124
9125                 trace_printk_seq(&iter.seq);
9126         }
9127
9128         if (!cnt)
9129                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9130         else
9131                 printk(KERN_TRACE "---------------------------------\n");
9132
9133  out_enable:
9134         tr->trace_flags |= old_userobj;
9135
9136         for_each_tracing_cpu(cpu) {
9137                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9138         }
9139         atomic_dec(&dump_running);
9140         printk_nmi_direct_exit();
9141         local_irq_restore(flags);
9142 }
9143 EXPORT_SYMBOL_GPL(ftrace_dump);
9144
9145 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9146 {
9147         char **argv;
9148         int argc, ret;
9149
9150         argc = 0;
9151         ret = 0;
9152         argv = argv_split(GFP_KERNEL, buf, &argc);
9153         if (!argv)
9154                 return -ENOMEM;
9155
9156         if (argc)
9157                 ret = createfn(argc, argv);
9158
9159         argv_free(argv);
9160
9161         return ret;
9162 }
9163
9164 #define WRITE_BUFSIZE  4096
9165
9166 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9167                                 size_t count, loff_t *ppos,
9168                                 int (*createfn)(int, char **))
9169 {
9170         char *kbuf, *buf, *tmp;
9171         int ret = 0;
9172         size_t done = 0;
9173         size_t size;
9174
9175         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9176         if (!kbuf)
9177                 return -ENOMEM;
9178
9179         while (done < count) {
9180                 size = count - done;
9181
9182                 if (size >= WRITE_BUFSIZE)
9183                         size = WRITE_BUFSIZE - 1;
9184
9185                 if (copy_from_user(kbuf, buffer + done, size)) {
9186                         ret = -EFAULT;
9187                         goto out;
9188                 }
9189                 kbuf[size] = '\0';
9190                 buf = kbuf;
9191                 do {
9192                         tmp = strchr(buf, '\n');
9193                         if (tmp) {
9194                                 *tmp = '\0';
9195                                 size = tmp - buf + 1;
9196                         } else {
9197                                 size = strlen(buf);
9198                                 if (done + size < count) {
9199                                         if (buf != kbuf)
9200                                                 break;
9201                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9202                                         pr_warn("Line length is too long: Should be less than %d\n",
9203                                                 WRITE_BUFSIZE - 2);
9204                                         ret = -EINVAL;
9205                                         goto out;
9206                                 }
9207                         }
9208                         done += size;
9209
9210                         /* Remove comments */
9211                         tmp = strchr(buf, '#');
9212
9213                         if (tmp)
9214                                 *tmp = '\0';
9215
9216                         ret = trace_run_command(buf, createfn);
9217                         if (ret)
9218                                 goto out;
9219                         buf += size;
9220
9221                 } while (done < count);
9222         }
9223         ret = done;
9224
9225 out:
9226         kfree(kbuf);
9227
9228         return ret;
9229 }
9230
9231 __init static int tracer_alloc_buffers(void)
9232 {
9233         int ring_buf_size;
9234         int ret = -ENOMEM;
9235
9236
9237         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9238                 pr_warning("Tracing disabled due to lockdown\n");
9239                 return -EPERM;
9240         }
9241
9242         /*
9243          * Make sure we don't accidently add more trace options
9244          * than we have bits for.
9245          */
9246         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9247
9248         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9249                 goto out;
9250
9251         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9252                 goto out_free_buffer_mask;
9253
9254         /* Only allocate trace_printk buffers if a trace_printk exists */
9255         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9256                 /* Must be called before global_trace.buffer is allocated */
9257                 trace_printk_init_buffers();
9258
9259         /* To save memory, keep the ring buffer size to its minimum */
9260         if (ring_buffer_expanded)
9261                 ring_buf_size = trace_buf_size;
9262         else
9263                 ring_buf_size = 1;
9264
9265         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9266         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9267
9268         raw_spin_lock_init(&global_trace.start_lock);
9269
9270         /*
9271          * The prepare callbacks allocates some memory for the ring buffer. We
9272          * don't free the buffer if the if the CPU goes down. If we were to free
9273          * the buffer, then the user would lose any trace that was in the
9274          * buffer. The memory will be removed once the "instance" is removed.
9275          */
9276         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9277                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9278                                       NULL);
9279         if (ret < 0)
9280                 goto out_free_cpumask;
9281         /* Used for event triggers */
9282         ret = -ENOMEM;
9283         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9284         if (!temp_buffer)
9285                 goto out_rm_hp_state;
9286
9287         if (trace_create_savedcmd() < 0)
9288                 goto out_free_temp_buffer;
9289
9290         /* TODO: make the number of buffers hot pluggable with CPUS */
9291         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9292                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9293                 WARN_ON(1);
9294                 goto out_free_savedcmd;
9295         }
9296
9297         if (global_trace.buffer_disabled)
9298                 tracing_off();
9299
9300         if (trace_boot_clock) {
9301                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9302                 if (ret < 0)
9303                         pr_warn("Trace clock %s not defined, going back to default\n",
9304                                 trace_boot_clock);
9305         }
9306
9307         /*
9308          * register_tracer() might reference current_trace, so it
9309          * needs to be set before we register anything. This is
9310          * just a bootstrap of current_trace anyway.
9311          */
9312         global_trace.current_trace = &nop_trace;
9313
9314         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9315
9316         ftrace_init_global_array_ops(&global_trace);
9317
9318         init_trace_flags_index(&global_trace);
9319
9320         register_tracer(&nop_trace);
9321
9322         /* Function tracing may start here (via kernel command line) */
9323         init_function_trace();
9324
9325         /* All seems OK, enable tracing */
9326         tracing_disabled = 0;
9327
9328         atomic_notifier_chain_register(&panic_notifier_list,
9329                                        &trace_panic_notifier);
9330
9331         register_die_notifier(&trace_die_notifier);
9332
9333         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9334
9335         INIT_LIST_HEAD(&global_trace.systems);
9336         INIT_LIST_HEAD(&global_trace.events);
9337         INIT_LIST_HEAD(&global_trace.hist_vars);
9338         INIT_LIST_HEAD(&global_trace.err_log);
9339         list_add(&global_trace.list, &ftrace_trace_arrays);
9340
9341         apply_trace_boot_options();
9342
9343         register_snapshot_cmd();
9344
9345         return 0;
9346
9347 out_free_savedcmd:
9348         free_saved_cmdlines_buffer(savedcmd);
9349 out_free_temp_buffer:
9350         ring_buffer_free(temp_buffer);
9351 out_rm_hp_state:
9352         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9353 out_free_cpumask:
9354         free_cpumask_var(global_trace.tracing_cpumask);
9355 out_free_buffer_mask:
9356         free_cpumask_var(tracing_buffer_mask);
9357 out:
9358         return ret;
9359 }
9360
9361 void __init early_trace_init(void)
9362 {
9363         if (tracepoint_printk) {
9364                 tracepoint_print_iter =
9365                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9366                 if (WARN_ON(!tracepoint_print_iter))
9367                         tracepoint_printk = 0;
9368                 else
9369                         static_key_enable(&tracepoint_printk_key.key);
9370         }
9371         tracer_alloc_buffers();
9372
9373         init_events();
9374 }
9375
9376 void __init trace_init(void)
9377 {
9378         trace_event_init();
9379 }
9380
9381 __init static int clear_boot_tracer(void)
9382 {
9383         /*
9384          * The default tracer at boot buffer is an init section.
9385          * This function is called in lateinit. If we did not
9386          * find the boot tracer, then clear it out, to prevent
9387          * later registration from accessing the buffer that is
9388          * about to be freed.
9389          */
9390         if (!default_bootup_tracer)
9391                 return 0;
9392
9393         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9394                default_bootup_tracer);
9395         default_bootup_tracer = NULL;
9396
9397         return 0;
9398 }
9399
9400 fs_initcall(tracer_init_tracefs);
9401 late_initcall_sync(clear_boot_tracer);
9402
9403 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9404 __init static int tracing_set_default_clock(void)
9405 {
9406         /* sched_clock_stable() is determined in late_initcall */
9407         if (!trace_boot_clock && !sched_clock_stable()) {
9408                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9409                         pr_warn("Can not set tracing clock due to lockdown\n");
9410                         return -EPERM;
9411                 }
9412
9413                 printk(KERN_WARNING
9414                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9415                        "If you want to keep using the local clock, then add:\n"
9416                        "  \"trace_clock=local\"\n"
9417                        "on the kernel command line\n");
9418                 tracing_set_clock(&global_trace, "global");
9419         }
9420
9421         return 0;
9422 }
9423 late_initcall_sync(tracing_set_default_clock);
9424 #endif