GNU Linux-libre 4.19.211-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list) {
500                 trace_parser_put(&parser);
501                 return -ENOMEM;
502         }
503
504         pid_list->pid_max = READ_ONCE(pid_max);
505
506         /* Only truncating will shrink pid_max */
507         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508                 pid_list->pid_max = filtered_pids->pid_max;
509
510         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511         if (!pid_list->pids) {
512                 trace_parser_put(&parser);
513                 kfree(pid_list);
514                 return -ENOMEM;
515         }
516
517         if (filtered_pids) {
518                 /* copy the current bits to the new max */
519                 for_each_set_bit(pid, filtered_pids->pids,
520                                  filtered_pids->pid_max) {
521                         set_bit(pid, pid_list->pids);
522                         nr_pids++;
523                 }
524         }
525
526         while (cnt > 0) {
527
528                 pos = 0;
529
530                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
531                 if (ret < 0 || !trace_parser_loaded(&parser))
532                         break;
533
534                 read += ret;
535                 ubuf += ret;
536                 cnt -= ret;
537
538                 ret = -EINVAL;
539                 if (kstrtoul(parser.buffer, 0, &val))
540                         break;
541                 if (val >= pid_list->pid_max)
542                         break;
543
544                 pid = (pid_t)val;
545
546                 set_bit(pid, pid_list->pids);
547                 nr_pids++;
548
549                 trace_parser_clear(&parser);
550                 ret = 0;
551         }
552         trace_parser_put(&parser);
553
554         if (ret < 0) {
555                 trace_free_pid_list(pid_list);
556                 return ret;
557         }
558
559         if (!nr_pids) {
560                 /* Cleared the list of pids */
561                 trace_free_pid_list(pid_list);
562                 read = ret;
563                 pid_list = NULL;
564         }
565
566         *new_pid_list = pid_list;
567
568         return read;
569 }
570
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573         u64 ts;
574
575         /* Early boot up does not have a buffer yet */
576         if (!buf->buffer)
577                 return trace_clock_local();
578
579         ts = ring_buffer_time_stamp(buf->buffer, cpu);
580         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582         return ts;
583 }
584
585 u64 ftrace_now(int cpu)
586 {
587         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601         /*
602          * For quick access (irqsoff uses this in fast path), just
603          * return the mirror variable of the state of the ring buffer.
604          * It's a little racy, but we don't really care.
605          */
606         smp_rmb();
607         return !global_trace.buffer_disabled;
608 }
609
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer            *trace_types __read_mostly;
626
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
658 static inline void trace_access_lock(int cpu)
659 {
660         if (cpu == RING_BUFFER_ALL_CPUS) {
661                 /* gain it for accessing the whole ring buffer. */
662                 down_write(&all_cpu_access_lock);
663         } else {
664                 /* gain it for accessing a cpu ring buffer. */
665
666                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667                 down_read(&all_cpu_access_lock);
668
669                 /* Secondly block other access to this @cpu ring buffer. */
670                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671         }
672 }
673
674 static inline void trace_access_unlock(int cpu)
675 {
676         if (cpu == RING_BUFFER_ALL_CPUS) {
677                 up_write(&all_cpu_access_lock);
678         } else {
679                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680                 up_read(&all_cpu_access_lock);
681         }
682 }
683
684 static inline void trace_access_lock_init(void)
685 {
686         int cpu;
687
688         for_each_possible_cpu(cpu)
689                 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
696 static inline void trace_access_lock(int cpu)
697 {
698         (void)cpu;
699         mutex_lock(&access_lock);
700 }
701
702 static inline void trace_access_unlock(int cpu)
703 {
704         (void)cpu;
705         mutex_unlock(&access_lock);
706 }
707
708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716                                  unsigned long flags,
717                                  int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719                                       struct ring_buffer *buffer,
720                                       unsigned long flags,
721                                       int skip, int pc, struct pt_regs *regs);
722
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725                                         unsigned long flags,
726                                         int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730                                       struct ring_buffer *buffer,
731                                       unsigned long flags,
732                                       int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740                   int type, unsigned long flags, int pc)
741 {
742         struct trace_entry *ent = ring_buffer_event_data(event);
743
744         tracing_generic_entry_update(ent, flags, pc);
745         ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750                           int type,
751                           unsigned long len,
752                           unsigned long flags, int pc)
753 {
754         struct ring_buffer_event *event;
755
756         event = ring_buffer_lock_reserve(buffer, len);
757         if (event != NULL)
758                 trace_event_setup(event, type, flags, pc);
759
760         return event;
761 }
762
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765         if (tr->trace_buffer.buffer)
766                 ring_buffer_record_on(tr->trace_buffer.buffer);
767         /*
768          * This flag is looked at when buffers haven't been allocated
769          * yet, or by some tracers (like irqsoff), that just want to
770          * know if the ring buffer has been disabled, but it can handle
771          * races of where it gets disabled but we still do a record.
772          * As the check is in the fast path of the tracers, it is more
773          * important to be fast than accurate.
774          */
775         tr->buffer_disabled = 0;
776         /* Make the flag seen by readers */
777         smp_wmb();
778 }
779
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788         tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796         __this_cpu_write(trace_taskinfo_save, true);
797
798         /* If this is the temp buffer, we need to commit fully */
799         if (this_cpu_read(trace_buffered_event) == event) {
800                 /* Length is in event->array[0] */
801                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802                 /* Release the temp buffer */
803                 this_cpu_dec(trace_buffered_event_cnt);
804         } else
805                 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:    The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816         struct ring_buffer_event *event;
817         struct ring_buffer *buffer;
818         struct print_entry *entry;
819         unsigned long irq_flags;
820         int alloc;
821         int pc;
822
823         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824                 return 0;
825
826         pc = preempt_count();
827
828         if (unlikely(tracing_selftest_running || tracing_disabled))
829                 return 0;
830
831         alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833         local_save_flags(irq_flags);
834         buffer = global_trace.trace_buffer.buffer;
835         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
836                                             irq_flags, pc);
837         if (!event)
838                 return 0;
839
840         entry = ring_buffer_event_data(event);
841         entry->ip = ip;
842
843         memcpy(&entry->buf, str, size);
844
845         /* Add a newline if necessary */
846         if (entry->buf[size - 1] != '\n') {
847                 entry->buf[size] = '\n';
848                 entry->buf[size + 1] = '\0';
849         } else
850                 entry->buf[size] = '\0';
851
852         __buffer_unlock_commit(buffer, event);
853         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855         return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:    The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866         struct ring_buffer_event *event;
867         struct ring_buffer *buffer;
868         struct bputs_entry *entry;
869         unsigned long irq_flags;
870         int size = sizeof(struct bputs_entry);
871         int pc;
872
873         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874                 return 0;
875
876         pc = preempt_count();
877
878         if (unlikely(tracing_selftest_running || tracing_disabled))
879                 return 0;
880
881         local_save_flags(irq_flags);
882         buffer = global_trace.trace_buffer.buffer;
883         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884                                             irq_flags, pc);
885         if (!event)
886                 return 0;
887
888         entry = ring_buffer_event_data(event);
889         entry->ip                       = ip;
890         entry->str                      = str;
891
892         __buffer_unlock_commit(buffer, event);
893         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895         return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902         struct tracer *tracer = tr->current_trace;
903         unsigned long flags;
904
905         if (in_nmi()) {
906                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907                 internal_trace_puts("*** snapshot is being ignored        ***\n");
908                 return;
909         }
910
911         if (!tr->allocated_snapshot) {
912                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913                 internal_trace_puts("*** stopping trace here!   ***\n");
914                 tracing_off();
915                 return;
916         }
917
918         /* Note, snapshot can not be used when the tracer uses it */
919         if (tracer->use_max_tr) {
920                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922                 return;
923         }
924
925         local_irq_save(flags);
926         update_max_tr(tr, current, smp_processor_id());
927         local_irq_restore(flags);
928 }
929
930 /**
931  * tracing_snapshot - take a snapshot of the current buffer.
932  *
933  * This causes a swap between the snapshot buffer and the current live
934  * tracing buffer. You can use this to take snapshots of the live
935  * trace when some condition is triggered, but continue to trace.
936  *
937  * Note, make sure to allocate the snapshot with either
938  * a tracing_snapshot_alloc(), or by doing it manually
939  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940  *
941  * If the snapshot buffer is not allocated, it will stop tracing.
942  * Basically making a permanent snapshot.
943  */
944 void tracing_snapshot(void)
945 {
946         struct trace_array *tr = &global_trace;
947
948         tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953                                         struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955
956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958         int ret;
959
960         if (!tr->allocated_snapshot) {
961
962                 /* allocate spare buffer */
963                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
964                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965                 if (ret < 0)
966                         return ret;
967
968                 tr->allocated_snapshot = true;
969         }
970
971         return 0;
972 }
973
974 static void free_snapshot(struct trace_array *tr)
975 {
976         /*
977          * We don't free the ring buffer. instead, resize it because
978          * The max_tr ring buffer has some state (e.g. ring->clock) and
979          * we want preserve it.
980          */
981         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982         set_buffer_entries(&tr->max_buffer, 1);
983         tracing_reset_online_cpus(&tr->max_buffer);
984         tr->allocated_snapshot = false;
985 }
986
987 /**
988  * tracing_alloc_snapshot - allocate snapshot buffer.
989  *
990  * This only allocates the snapshot buffer if it isn't already
991  * allocated - it doesn't also take a snapshot.
992  *
993  * This is meant to be used in cases where the snapshot buffer needs
994  * to be set up for events that can't sleep but need to be able to
995  * trigger a snapshot.
996  */
997 int tracing_alloc_snapshot(void)
998 {
999         struct trace_array *tr = &global_trace;
1000         int ret;
1001
1002         ret = tracing_alloc_snapshot_instance(tr);
1003         WARN_ON(ret < 0);
1004
1005         return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008
1009 /**
1010  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011  *
1012  * This is similar to tracing_snapshot(), but it will allocate the
1013  * snapshot buffer if it isn't already allocated. Use this only
1014  * where it is safe to sleep, as the allocation may sleep.
1015  *
1016  * This causes a swap between the snapshot buffer and the current live
1017  * tracing buffer. You can use this to take snapshots of the live
1018  * trace when some condition is triggered, but continue to trace.
1019  */
1020 void tracing_snapshot_alloc(void)
1021 {
1022         int ret;
1023
1024         ret = tracing_alloc_snapshot();
1025         if (ret < 0)
1026                 return;
1027
1028         tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
1032 void tracing_snapshot(void)
1033 {
1034         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
1037 int tracing_alloc_snapshot(void)
1038 {
1039         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040         return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1043 void tracing_snapshot_alloc(void)
1044 {
1045         /* Give warning */
1046         tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050
1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053         if (tr->trace_buffer.buffer)
1054                 ring_buffer_record_off(tr->trace_buffer.buffer);
1055         /*
1056          * This flag is looked at when buffers haven't been allocated
1057          * yet, or by some tracers (like irqsoff), that just want to
1058          * know if the ring buffer has been disabled, but it can handle
1059          * races of where it gets disabled but we still do a record.
1060          * As the check is in the fast path of the tracers, it is more
1061          * important to be fast than accurate.
1062          */
1063         tr->buffer_disabled = 1;
1064         /* Make the flag seen by readers */
1065         smp_wmb();
1066 }
1067
1068 /**
1069  * tracing_off - turn off tracing buffers
1070  *
1071  * This function stops the tracing buffers from recording data.
1072  * It does not disable any overhead the tracers themselves may
1073  * be causing. This function simply causes all recording to
1074  * the ring buffers to fail.
1075  */
1076 void tracing_off(void)
1077 {
1078         tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081
1082 void disable_trace_on_warning(void)
1083 {
1084         if (__disable_trace_on_warning)
1085                 tracing_off();
1086 }
1087
1088 /**
1089  * tracer_tracing_is_on - show real state of ring buffer enabled
1090  * @tr : the trace array to know if ring buffer is enabled
1091  *
1092  * Shows real state of the ring buffer if it is enabled or not.
1093  */
1094 bool tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096         if (tr->trace_buffer.buffer)
1097                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098         return !tr->buffer_disabled;
1099 }
1100
1101 /**
1102  * tracing_is_on - show state of ring buffers enabled
1103  */
1104 int tracing_is_on(void)
1105 {
1106         return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109
1110 static int __init set_buf_size(char *str)
1111 {
1112         unsigned long buf_size;
1113
1114         if (!str)
1115                 return 0;
1116         buf_size = memparse(str, &str);
1117         /* nr_entries can not be zero */
1118         if (buf_size == 0)
1119                 return 0;
1120         trace_buf_size = buf_size;
1121         return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124
1125 static int __init set_tracing_thresh(char *str)
1126 {
1127         unsigned long threshold;
1128         int ret;
1129
1130         if (!str)
1131                 return 0;
1132         ret = kstrtoul(str, 0, &threshold);
1133         if (ret < 0)
1134                 return 0;
1135         tracing_thresh = threshold * 1000;
1136         return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139
1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142         return nsecs / 1000;
1143 }
1144
1145 /*
1146  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149  * of strings in the order that the evals (enum) were defined.
1150  */
1151 #undef C
1152 #define C(a, b) b
1153
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156         TRACE_FLAGS
1157         NULL
1158 };
1159
1160 static struct {
1161         u64 (*func)(void);
1162         const char *name;
1163         int in_ns;              /* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165         { trace_clock_local,            "local",        1 },
1166         { trace_clock_global,           "global",       1 },
1167         { trace_clock_counter,          "counter",      0 },
1168         { trace_clock_jiffies,          "uptime",       0 },
1169         { trace_clock,                  "perf",         1 },
1170         { ktime_get_mono_fast_ns,       "mono",         1 },
1171         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1172         { ktime_get_boot_fast_ns,       "boot",         1 },
1173         ARCH_TRACE_CLOCKS
1174 };
1175
1176 bool trace_clock_in_ns(struct trace_array *tr)
1177 {
1178         if (trace_clocks[tr->clock_id].in_ns)
1179                 return true;
1180
1181         return false;
1182 }
1183
1184 /*
1185  * trace_parser_get_init - gets the buffer for trace parser
1186  */
1187 int trace_parser_get_init(struct trace_parser *parser, int size)
1188 {
1189         memset(parser, 0, sizeof(*parser));
1190
1191         parser->buffer = kmalloc(size, GFP_KERNEL);
1192         if (!parser->buffer)
1193                 return 1;
1194
1195         parser->size = size;
1196         return 0;
1197 }
1198
1199 /*
1200  * trace_parser_put - frees the buffer for trace parser
1201  */
1202 void trace_parser_put(struct trace_parser *parser)
1203 {
1204         kfree(parser->buffer);
1205         parser->buffer = NULL;
1206 }
1207
1208 /*
1209  * trace_get_user - reads the user input string separated by  space
1210  * (matched by isspace(ch))
1211  *
1212  * For each string found the 'struct trace_parser' is updated,
1213  * and the function returns.
1214  *
1215  * Returns number of bytes read.
1216  *
1217  * See kernel/trace/trace.h for 'struct trace_parser' details.
1218  */
1219 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1220         size_t cnt, loff_t *ppos)
1221 {
1222         char ch;
1223         size_t read = 0;
1224         ssize_t ret;
1225
1226         if (!*ppos)
1227                 trace_parser_clear(parser);
1228
1229         ret = get_user(ch, ubuf++);
1230         if (ret)
1231                 goto out;
1232
1233         read++;
1234         cnt--;
1235
1236         /*
1237          * The parser is not finished with the last write,
1238          * continue reading the user input without skipping spaces.
1239          */
1240         if (!parser->cont) {
1241                 /* skip white space */
1242                 while (cnt && isspace(ch)) {
1243                         ret = get_user(ch, ubuf++);
1244                         if (ret)
1245                                 goto out;
1246                         read++;
1247                         cnt--;
1248                 }
1249
1250                 parser->idx = 0;
1251
1252                 /* only spaces were written */
1253                 if (isspace(ch) || !ch) {
1254                         *ppos += read;
1255                         ret = read;
1256                         goto out;
1257                 }
1258         }
1259
1260         /* read the non-space input */
1261         while (cnt && !isspace(ch) && ch) {
1262                 if (parser->idx < parser->size - 1)
1263                         parser->buffer[parser->idx++] = ch;
1264                 else {
1265                         ret = -EINVAL;
1266                         goto out;
1267                 }
1268                 ret = get_user(ch, ubuf++);
1269                 if (ret)
1270                         goto out;
1271                 read++;
1272                 cnt--;
1273         }
1274
1275         /* We either got finished input or we have to wait for another call. */
1276         if (isspace(ch) || !ch) {
1277                 parser->buffer[parser->idx] = 0;
1278                 parser->cont = false;
1279         } else if (parser->idx < parser->size - 1) {
1280                 parser->cont = true;
1281                 parser->buffer[parser->idx++] = ch;
1282                 /* Make sure the parsed string always terminates with '\0'. */
1283                 parser->buffer[parser->idx] = 0;
1284         } else {
1285                 ret = -EINVAL;
1286                 goto out;
1287         }
1288
1289         *ppos += read;
1290         ret = read;
1291
1292 out:
1293         return ret;
1294 }
1295
1296 /* TODO add a seq_buf_to_buffer() */
1297 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1298 {
1299         int len;
1300
1301         if (trace_seq_used(s) <= s->seq.readpos)
1302                 return -EBUSY;
1303
1304         len = trace_seq_used(s) - s->seq.readpos;
1305         if (cnt > len)
1306                 cnt = len;
1307         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1308
1309         s->seq.readpos += cnt;
1310         return cnt;
1311 }
1312
1313 unsigned long __read_mostly     tracing_thresh;
1314
1315 #ifdef CONFIG_TRACER_MAX_TRACE
1316 /*
1317  * Copy the new maximum trace into the separate maximum-trace
1318  * structure. (this way the maximum trace is permanently saved,
1319  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1320  */
1321 static void
1322 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1323 {
1324         struct trace_buffer *trace_buf = &tr->trace_buffer;
1325         struct trace_buffer *max_buf = &tr->max_buffer;
1326         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1327         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1328
1329         max_buf->cpu = cpu;
1330         max_buf->time_start = data->preempt_timestamp;
1331
1332         max_data->saved_latency = tr->max_latency;
1333         max_data->critical_start = data->critical_start;
1334         max_data->critical_end = data->critical_end;
1335
1336         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1337         max_data->pid = tsk->pid;
1338         /*
1339          * If tsk == current, then use current_uid(), as that does not use
1340          * RCU. The irq tracer can be called out of RCU scope.
1341          */
1342         if (tsk == current)
1343                 max_data->uid = current_uid();
1344         else
1345                 max_data->uid = task_uid(tsk);
1346
1347         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1348         max_data->policy = tsk->policy;
1349         max_data->rt_priority = tsk->rt_priority;
1350
1351         /* record this tasks comm */
1352         tracing_record_cmdline(tsk);
1353 }
1354
1355 /**
1356  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1357  * @tr: tracer
1358  * @tsk: the task with the latency
1359  * @cpu: The cpu that initiated the trace.
1360  *
1361  * Flip the buffers between the @tr and the max_tr and record information
1362  * about which task was the cause of this latency.
1363  */
1364 void
1365 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1366 {
1367         if (tr->stop_count)
1368                 return;
1369
1370         WARN_ON_ONCE(!irqs_disabled());
1371
1372         if (!tr->allocated_snapshot) {
1373                 /* Only the nop tracer should hit this when disabling */
1374                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1375                 return;
1376         }
1377
1378         arch_spin_lock(&tr->max_lock);
1379
1380         /* Inherit the recordable setting from trace_buffer */
1381         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1382                 ring_buffer_record_on(tr->max_buffer.buffer);
1383         else
1384                 ring_buffer_record_off(tr->max_buffer.buffer);
1385
1386         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1387
1388         __update_max_tr(tr, tsk, cpu);
1389         arch_spin_unlock(&tr->max_lock);
1390 }
1391
1392 /**
1393  * update_max_tr_single - only copy one trace over, and reset the rest
1394  * @tr - tracer
1395  * @tsk - task with the latency
1396  * @cpu - the cpu of the buffer to copy.
1397  *
1398  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1399  */
1400 void
1401 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1402 {
1403         int ret;
1404
1405         if (tr->stop_count)
1406                 return;
1407
1408         WARN_ON_ONCE(!irqs_disabled());
1409         if (!tr->allocated_snapshot) {
1410                 /* Only the nop tracer should hit this when disabling */
1411                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1412                 return;
1413         }
1414
1415         arch_spin_lock(&tr->max_lock);
1416
1417         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1418
1419         if (ret == -EBUSY) {
1420                 /*
1421                  * We failed to swap the buffer due to a commit taking
1422                  * place on this CPU. We fail to record, but we reset
1423                  * the max trace buffer (no one writes directly to it)
1424                  * and flag that it failed.
1425                  */
1426                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1427                         "Failed to swap buffers due to commit in progress\n");
1428         }
1429
1430         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1431
1432         __update_max_tr(tr, tsk, cpu);
1433         arch_spin_unlock(&tr->max_lock);
1434 }
1435 #endif /* CONFIG_TRACER_MAX_TRACE */
1436
1437 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1438 {
1439         /* Iterators are static, they should be filled or empty */
1440         if (trace_buffer_iter(iter, iter->cpu_file))
1441                 return 0;
1442
1443         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1444                                 full);
1445 }
1446
1447 #ifdef CONFIG_FTRACE_STARTUP_TEST
1448 static bool selftests_can_run;
1449
1450 struct trace_selftests {
1451         struct list_head                list;
1452         struct tracer                   *type;
1453 };
1454
1455 static LIST_HEAD(postponed_selftests);
1456
1457 static int save_selftest(struct tracer *type)
1458 {
1459         struct trace_selftests *selftest;
1460
1461         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1462         if (!selftest)
1463                 return -ENOMEM;
1464
1465         selftest->type = type;
1466         list_add(&selftest->list, &postponed_selftests);
1467         return 0;
1468 }
1469
1470 static int run_tracer_selftest(struct tracer *type)
1471 {
1472         struct trace_array *tr = &global_trace;
1473         struct tracer *saved_tracer = tr->current_trace;
1474         int ret;
1475
1476         if (!type->selftest || tracing_selftest_disabled)
1477                 return 0;
1478
1479         /*
1480          * If a tracer registers early in boot up (before scheduling is
1481          * initialized and such), then do not run its selftests yet.
1482          * Instead, run it a little later in the boot process.
1483          */
1484         if (!selftests_can_run)
1485                 return save_selftest(type);
1486
1487         /*
1488          * Run a selftest on this tracer.
1489          * Here we reset the trace buffer, and set the current
1490          * tracer to be this tracer. The tracer can then run some
1491          * internal tracing to verify that everything is in order.
1492          * If we fail, we do not register this tracer.
1493          */
1494         tracing_reset_online_cpus(&tr->trace_buffer);
1495
1496         tr->current_trace = type;
1497
1498 #ifdef CONFIG_TRACER_MAX_TRACE
1499         if (type->use_max_tr) {
1500                 /* If we expanded the buffers, make sure the max is expanded too */
1501                 if (ring_buffer_expanded)
1502                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1503                                            RING_BUFFER_ALL_CPUS);
1504                 tr->allocated_snapshot = true;
1505         }
1506 #endif
1507
1508         /* the test is responsible for initializing and enabling */
1509         pr_info("Testing tracer %s: ", type->name);
1510         ret = type->selftest(type, tr);
1511         /* the test is responsible for resetting too */
1512         tr->current_trace = saved_tracer;
1513         if (ret) {
1514                 printk(KERN_CONT "FAILED!\n");
1515                 /* Add the warning after printing 'FAILED' */
1516                 WARN_ON(1);
1517                 return -1;
1518         }
1519         /* Only reset on passing, to avoid touching corrupted buffers */
1520         tracing_reset_online_cpus(&tr->trace_buffer);
1521
1522 #ifdef CONFIG_TRACER_MAX_TRACE
1523         if (type->use_max_tr) {
1524                 tr->allocated_snapshot = false;
1525
1526                 /* Shrink the max buffer again */
1527                 if (ring_buffer_expanded)
1528                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1529                                            RING_BUFFER_ALL_CPUS);
1530         }
1531 #endif
1532
1533         printk(KERN_CONT "PASSED\n");
1534         return 0;
1535 }
1536
1537 static __init int init_trace_selftests(void)
1538 {
1539         struct trace_selftests *p, *n;
1540         struct tracer *t, **last;
1541         int ret;
1542
1543         selftests_can_run = true;
1544
1545         mutex_lock(&trace_types_lock);
1546
1547         if (list_empty(&postponed_selftests))
1548                 goto out;
1549
1550         pr_info("Running postponed tracer tests:\n");
1551
1552         tracing_selftest_running = true;
1553         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1554                 ret = run_tracer_selftest(p->type);
1555                 /* If the test fails, then warn and remove from available_tracers */
1556                 if (ret < 0) {
1557                         WARN(1, "tracer: %s failed selftest, disabling\n",
1558                              p->type->name);
1559                         last = &trace_types;
1560                         for (t = trace_types; t; t = t->next) {
1561                                 if (t == p->type) {
1562                                         *last = t->next;
1563                                         break;
1564                                 }
1565                                 last = &t->next;
1566                         }
1567                 }
1568                 list_del(&p->list);
1569                 kfree(p);
1570         }
1571         tracing_selftest_running = false;
1572
1573  out:
1574         mutex_unlock(&trace_types_lock);
1575
1576         return 0;
1577 }
1578 core_initcall(init_trace_selftests);
1579 #else
1580 static inline int run_tracer_selftest(struct tracer *type)
1581 {
1582         return 0;
1583 }
1584 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1585
1586 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1587
1588 static void __init apply_trace_boot_options(void);
1589
1590 /**
1591  * register_tracer - register a tracer with the ftrace system.
1592  * @type - the plugin for the tracer
1593  *
1594  * Register a new plugin tracer.
1595  */
1596 int __init register_tracer(struct tracer *type)
1597 {
1598         struct tracer *t;
1599         int ret = 0;
1600
1601         if (!type->name) {
1602                 pr_info("Tracer must have a name\n");
1603                 return -1;
1604         }
1605
1606         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1607                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1608                 return -1;
1609         }
1610
1611         mutex_lock(&trace_types_lock);
1612
1613         tracing_selftest_running = true;
1614
1615         for (t = trace_types; t; t = t->next) {
1616                 if (strcmp(type->name, t->name) == 0) {
1617                         /* already found */
1618                         pr_info("Tracer %s already registered\n",
1619                                 type->name);
1620                         ret = -1;
1621                         goto out;
1622                 }
1623         }
1624
1625         if (!type->set_flag)
1626                 type->set_flag = &dummy_set_flag;
1627         if (!type->flags) {
1628                 /*allocate a dummy tracer_flags*/
1629                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1630                 if (!type->flags) {
1631                         ret = -ENOMEM;
1632                         goto out;
1633                 }
1634                 type->flags->val = 0;
1635                 type->flags->opts = dummy_tracer_opt;
1636         } else
1637                 if (!type->flags->opts)
1638                         type->flags->opts = dummy_tracer_opt;
1639
1640         /* store the tracer for __set_tracer_option */
1641         type->flags->trace = type;
1642
1643         ret = run_tracer_selftest(type);
1644         if (ret < 0)
1645                 goto out;
1646
1647         type->next = trace_types;
1648         trace_types = type;
1649         add_tracer_options(&global_trace, type);
1650
1651  out:
1652         tracing_selftest_running = false;
1653         mutex_unlock(&trace_types_lock);
1654
1655         if (ret || !default_bootup_tracer)
1656                 goto out_unlock;
1657
1658         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1659                 goto out_unlock;
1660
1661         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1662         /* Do we want this tracer to start on bootup? */
1663         tracing_set_tracer(&global_trace, type->name);
1664         default_bootup_tracer = NULL;
1665
1666         apply_trace_boot_options();
1667
1668         /* disable other selftests, since this will break it. */
1669         tracing_selftest_disabled = true;
1670 #ifdef CONFIG_FTRACE_STARTUP_TEST
1671         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1672                type->name);
1673 #endif
1674
1675  out_unlock:
1676         return ret;
1677 }
1678
1679 void tracing_reset(struct trace_buffer *buf, int cpu)
1680 {
1681         struct ring_buffer *buffer = buf->buffer;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690         ring_buffer_reset_cpu(buffer, cpu);
1691
1692         ring_buffer_record_enable(buffer);
1693 }
1694
1695 void tracing_reset_online_cpus(struct trace_buffer *buf)
1696 {
1697         struct ring_buffer *buffer = buf->buffer;
1698         int cpu;
1699
1700         if (!buffer)
1701                 return;
1702
1703         ring_buffer_record_disable(buffer);
1704
1705         /* Make sure all commits have finished */
1706         synchronize_sched();
1707
1708         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1709
1710         for_each_online_cpu(cpu)
1711                 ring_buffer_reset_cpu(buffer, cpu);
1712
1713         ring_buffer_record_enable(buffer);
1714 }
1715
1716 /* Must have trace_types_lock held */
1717 void tracing_reset_all_online_cpus(void)
1718 {
1719         struct trace_array *tr;
1720
1721         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1722                 if (!tr->clear_trace)
1723                         continue;
1724                 tr->clear_trace = false;
1725                 tracing_reset_online_cpus(&tr->trace_buffer);
1726 #ifdef CONFIG_TRACER_MAX_TRACE
1727                 tracing_reset_online_cpus(&tr->max_buffer);
1728 #endif
1729         }
1730 }
1731
1732 /*
1733  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1734  * is the tgid last observed corresponding to pid=i.
1735  */
1736 static int *tgid_map;
1737
1738 /* The maximum valid index into tgid_map. */
1739 static size_t tgid_map_max;
1740
1741 #define SAVED_CMDLINES_DEFAULT 128
1742 #define NO_CMDLINE_MAP UINT_MAX
1743 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1744 struct saved_cmdlines_buffer {
1745         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1746         unsigned *map_cmdline_to_pid;
1747         unsigned cmdline_num;
1748         int cmdline_idx;
1749         char *saved_cmdlines;
1750 };
1751 static struct saved_cmdlines_buffer *savedcmd;
1752
1753 static inline char *get_saved_cmdlines(int idx)
1754 {
1755         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1756 }
1757
1758 static inline void set_cmdline(int idx, const char *cmdline)
1759 {
1760         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1761 }
1762
1763 static int allocate_cmdlines_buffer(unsigned int val,
1764                                     struct saved_cmdlines_buffer *s)
1765 {
1766         s->map_cmdline_to_pid = kmalloc_array(val,
1767                                               sizeof(*s->map_cmdline_to_pid),
1768                                               GFP_KERNEL);
1769         if (!s->map_cmdline_to_pid)
1770                 return -ENOMEM;
1771
1772         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1773         if (!s->saved_cmdlines) {
1774                 kfree(s->map_cmdline_to_pid);
1775                 return -ENOMEM;
1776         }
1777
1778         s->cmdline_idx = 0;
1779         s->cmdline_num = val;
1780         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1781                sizeof(s->map_pid_to_cmdline));
1782         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1783                val * sizeof(*s->map_cmdline_to_pid));
1784
1785         return 0;
1786 }
1787
1788 static int trace_create_savedcmd(void)
1789 {
1790         int ret;
1791
1792         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1793         if (!savedcmd)
1794                 return -ENOMEM;
1795
1796         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1797         if (ret < 0) {
1798                 kfree(savedcmd);
1799                 savedcmd = NULL;
1800                 return -ENOMEM;
1801         }
1802
1803         return 0;
1804 }
1805
1806 int is_tracing_stopped(void)
1807 {
1808         return global_trace.stop_count;
1809 }
1810
1811 /**
1812  * tracing_start - quick start of the tracer
1813  *
1814  * If tracing is enabled but was stopped by tracing_stop,
1815  * this will start the tracer back up.
1816  */
1817 void tracing_start(void)
1818 {
1819         struct ring_buffer *buffer;
1820         unsigned long flags;
1821
1822         if (tracing_disabled)
1823                 return;
1824
1825         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1826         if (--global_trace.stop_count) {
1827                 if (global_trace.stop_count < 0) {
1828                         /* Someone screwed up their debugging */
1829                         WARN_ON_ONCE(1);
1830                         global_trace.stop_count = 0;
1831                 }
1832                 goto out;
1833         }
1834
1835         /* Prevent the buffers from switching */
1836         arch_spin_lock(&global_trace.max_lock);
1837
1838         buffer = global_trace.trace_buffer.buffer;
1839         if (buffer)
1840                 ring_buffer_record_enable(buffer);
1841
1842 #ifdef CONFIG_TRACER_MAX_TRACE
1843         buffer = global_trace.max_buffer.buffer;
1844         if (buffer)
1845                 ring_buffer_record_enable(buffer);
1846 #endif
1847
1848         arch_spin_unlock(&global_trace.max_lock);
1849
1850  out:
1851         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1852 }
1853
1854 static void tracing_start_tr(struct trace_array *tr)
1855 {
1856         struct ring_buffer *buffer;
1857         unsigned long flags;
1858
1859         if (tracing_disabled)
1860                 return;
1861
1862         /* If global, we need to also start the max tracer */
1863         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1864                 return tracing_start();
1865
1866         raw_spin_lock_irqsave(&tr->start_lock, flags);
1867
1868         if (--tr->stop_count) {
1869                 if (tr->stop_count < 0) {
1870                         /* Someone screwed up their debugging */
1871                         WARN_ON_ONCE(1);
1872                         tr->stop_count = 0;
1873                 }
1874                 goto out;
1875         }
1876
1877         buffer = tr->trace_buffer.buffer;
1878         if (buffer)
1879                 ring_buffer_record_enable(buffer);
1880
1881  out:
1882         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1883 }
1884
1885 /**
1886  * tracing_stop - quick stop of the tracer
1887  *
1888  * Light weight way to stop tracing. Use in conjunction with
1889  * tracing_start.
1890  */
1891 void tracing_stop(void)
1892 {
1893         struct ring_buffer *buffer;
1894         unsigned long flags;
1895
1896         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1897         if (global_trace.stop_count++)
1898                 goto out;
1899
1900         /* Prevent the buffers from switching */
1901         arch_spin_lock(&global_trace.max_lock);
1902
1903         buffer = global_trace.trace_buffer.buffer;
1904         if (buffer)
1905                 ring_buffer_record_disable(buffer);
1906
1907 #ifdef CONFIG_TRACER_MAX_TRACE
1908         buffer = global_trace.max_buffer.buffer;
1909         if (buffer)
1910                 ring_buffer_record_disable(buffer);
1911 #endif
1912
1913         arch_spin_unlock(&global_trace.max_lock);
1914
1915  out:
1916         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1917 }
1918
1919 static void tracing_stop_tr(struct trace_array *tr)
1920 {
1921         struct ring_buffer *buffer;
1922         unsigned long flags;
1923
1924         /* If global, we need to also stop the max tracer */
1925         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1926                 return tracing_stop();
1927
1928         raw_spin_lock_irqsave(&tr->start_lock, flags);
1929         if (tr->stop_count++)
1930                 goto out;
1931
1932         buffer = tr->trace_buffer.buffer;
1933         if (buffer)
1934                 ring_buffer_record_disable(buffer);
1935
1936  out:
1937         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1938 }
1939
1940 static int trace_save_cmdline(struct task_struct *tsk)
1941 {
1942         unsigned tpid, idx;
1943
1944         /* treat recording of idle task as a success */
1945         if (!tsk->pid)
1946                 return 1;
1947
1948         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1949
1950         /*
1951          * It's not the end of the world if we don't get
1952          * the lock, but we also don't want to spin
1953          * nor do we want to disable interrupts,
1954          * so if we miss here, then better luck next time.
1955          */
1956         if (!arch_spin_trylock(&trace_cmdline_lock))
1957                 return 0;
1958
1959         idx = savedcmd->map_pid_to_cmdline[tpid];
1960         if (idx == NO_CMDLINE_MAP) {
1961                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1962
1963                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1964                 savedcmd->cmdline_idx = idx;
1965         }
1966
1967         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1968         set_cmdline(idx, tsk->comm);
1969
1970         arch_spin_unlock(&trace_cmdline_lock);
1971
1972         return 1;
1973 }
1974
1975 static void __trace_find_cmdline(int pid, char comm[])
1976 {
1977         unsigned map;
1978         int tpid;
1979
1980         if (!pid) {
1981                 strcpy(comm, "<idle>");
1982                 return;
1983         }
1984
1985         if (WARN_ON_ONCE(pid < 0)) {
1986                 strcpy(comm, "<XXX>");
1987                 return;
1988         }
1989
1990         tpid = pid & (PID_MAX_DEFAULT - 1);
1991         map = savedcmd->map_pid_to_cmdline[tpid];
1992         if (map != NO_CMDLINE_MAP) {
1993                 tpid = savedcmd->map_cmdline_to_pid[map];
1994                 if (tpid == pid) {
1995                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1996                         return;
1997                 }
1998         }
1999         strcpy(comm, "<...>");
2000 }
2001
2002 void trace_find_cmdline(int pid, char comm[])
2003 {
2004         preempt_disable();
2005         arch_spin_lock(&trace_cmdline_lock);
2006
2007         __trace_find_cmdline(pid, comm);
2008
2009         arch_spin_unlock(&trace_cmdline_lock);
2010         preempt_enable();
2011 }
2012
2013 static int *trace_find_tgid_ptr(int pid)
2014 {
2015         /*
2016          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2017          * if we observe a non-NULL tgid_map then we also observe the correct
2018          * tgid_map_max.
2019          */
2020         int *map = smp_load_acquire(&tgid_map);
2021
2022         if (unlikely(!map || pid > tgid_map_max))
2023                 return NULL;
2024
2025         return &map[pid];
2026 }
2027
2028 int trace_find_tgid(int pid)
2029 {
2030         int *ptr = trace_find_tgid_ptr(pid);
2031
2032         return ptr ? *ptr : 0;
2033 }
2034
2035 static int trace_save_tgid(struct task_struct *tsk)
2036 {
2037         int *ptr;
2038
2039         /* treat recording of idle task as a success */
2040         if (!tsk->pid)
2041                 return 1;
2042
2043         ptr = trace_find_tgid_ptr(tsk->pid);
2044         if (!ptr)
2045                 return 0;
2046
2047         *ptr = tsk->tgid;
2048         return 1;
2049 }
2050
2051 static bool tracing_record_taskinfo_skip(int flags)
2052 {
2053         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2054                 return true;
2055         if (!__this_cpu_read(trace_taskinfo_save))
2056                 return true;
2057         return false;
2058 }
2059
2060 /**
2061  * tracing_record_taskinfo - record the task info of a task
2062  *
2063  * @task  - task to record
2064  * @flags - TRACE_RECORD_CMDLINE for recording comm
2065  *        - TRACE_RECORD_TGID for recording tgid
2066  */
2067 void tracing_record_taskinfo(struct task_struct *task, int flags)
2068 {
2069         bool done;
2070
2071         if (tracing_record_taskinfo_skip(flags))
2072                 return;
2073
2074         /*
2075          * Record as much task information as possible. If some fail, continue
2076          * to try to record the others.
2077          */
2078         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2079         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2080
2081         /* If recording any information failed, retry again soon. */
2082         if (!done)
2083                 return;
2084
2085         __this_cpu_write(trace_taskinfo_save, false);
2086 }
2087
2088 /**
2089  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2090  *
2091  * @prev - previous task during sched_switch
2092  * @next - next task during sched_switch
2093  * @flags - TRACE_RECORD_CMDLINE for recording comm
2094  *          TRACE_RECORD_TGID for recording tgid
2095  */
2096 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2097                                           struct task_struct *next, int flags)
2098 {
2099         bool done;
2100
2101         if (tracing_record_taskinfo_skip(flags))
2102                 return;
2103
2104         /*
2105          * Record as much task information as possible. If some fail, continue
2106          * to try to record the others.
2107          */
2108         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2109         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2110         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2111         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2112
2113         /* If recording any information failed, retry again soon. */
2114         if (!done)
2115                 return;
2116
2117         __this_cpu_write(trace_taskinfo_save, false);
2118 }
2119
2120 /* Helpers to record a specific task information */
2121 void tracing_record_cmdline(struct task_struct *task)
2122 {
2123         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2124 }
2125
2126 void tracing_record_tgid(struct task_struct *task)
2127 {
2128         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2129 }
2130
2131 /*
2132  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2133  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2134  * simplifies those functions and keeps them in sync.
2135  */
2136 enum print_line_t trace_handle_return(struct trace_seq *s)
2137 {
2138         return trace_seq_has_overflowed(s) ?
2139                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2140 }
2141 EXPORT_SYMBOL_GPL(trace_handle_return);
2142
2143 void
2144 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2145                              int pc)
2146 {
2147         struct task_struct *tsk = current;
2148
2149         entry->preempt_count            = pc & 0xff;
2150         entry->pid                      = (tsk) ? tsk->pid : 0;
2151         entry->flags =
2152 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2153                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2154 #else
2155                 TRACE_FLAG_IRQS_NOSUPPORT |
2156 #endif
2157                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2158                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2159                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2160                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2161                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2162 }
2163 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2164
2165 struct ring_buffer_event *
2166 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2167                           int type,
2168                           unsigned long len,
2169                           unsigned long flags, int pc)
2170 {
2171         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2172 }
2173
2174 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2175 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2176 static int trace_buffered_event_ref;
2177
2178 /**
2179  * trace_buffered_event_enable - enable buffering events
2180  *
2181  * When events are being filtered, it is quicker to use a temporary
2182  * buffer to write the event data into if there's a likely chance
2183  * that it will not be committed. The discard of the ring buffer
2184  * is not as fast as committing, and is much slower than copying
2185  * a commit.
2186  *
2187  * When an event is to be filtered, allocate per cpu buffers to
2188  * write the event data into, and if the event is filtered and discarded
2189  * it is simply dropped, otherwise, the entire data is to be committed
2190  * in one shot.
2191  */
2192 void trace_buffered_event_enable(void)
2193 {
2194         struct ring_buffer_event *event;
2195         struct page *page;
2196         int cpu;
2197
2198         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2199
2200         if (trace_buffered_event_ref++)
2201                 return;
2202
2203         for_each_tracing_cpu(cpu) {
2204                 page = alloc_pages_node(cpu_to_node(cpu),
2205                                         GFP_KERNEL | __GFP_NORETRY, 0);
2206                 if (!page)
2207                         goto failed;
2208
2209                 event = page_address(page);
2210                 memset(event, 0, sizeof(*event));
2211
2212                 per_cpu(trace_buffered_event, cpu) = event;
2213
2214                 preempt_disable();
2215                 if (cpu == smp_processor_id() &&
2216                     this_cpu_read(trace_buffered_event) !=
2217                     per_cpu(trace_buffered_event, cpu))
2218                         WARN_ON_ONCE(1);
2219                 preempt_enable();
2220         }
2221
2222         return;
2223  failed:
2224         trace_buffered_event_disable();
2225 }
2226
2227 static void enable_trace_buffered_event(void *data)
2228 {
2229         /* Probably not needed, but do it anyway */
2230         smp_rmb();
2231         this_cpu_dec(trace_buffered_event_cnt);
2232 }
2233
2234 static void disable_trace_buffered_event(void *data)
2235 {
2236         this_cpu_inc(trace_buffered_event_cnt);
2237 }
2238
2239 /**
2240  * trace_buffered_event_disable - disable buffering events
2241  *
2242  * When a filter is removed, it is faster to not use the buffered
2243  * events, and to commit directly into the ring buffer. Free up
2244  * the temp buffers when there are no more users. This requires
2245  * special synchronization with current events.
2246  */
2247 void trace_buffered_event_disable(void)
2248 {
2249         int cpu;
2250
2251         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2252
2253         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2254                 return;
2255
2256         if (--trace_buffered_event_ref)
2257                 return;
2258
2259         preempt_disable();
2260         /* For each CPU, set the buffer as used. */
2261         smp_call_function_many(tracing_buffer_mask,
2262                                disable_trace_buffered_event, NULL, 1);
2263         preempt_enable();
2264
2265         /* Wait for all current users to finish */
2266         synchronize_sched();
2267
2268         for_each_tracing_cpu(cpu) {
2269                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2270                 per_cpu(trace_buffered_event, cpu) = NULL;
2271         }
2272         /*
2273          * Make sure trace_buffered_event is NULL before clearing
2274          * trace_buffered_event_cnt.
2275          */
2276         smp_wmb();
2277
2278         preempt_disable();
2279         /* Do the work on each cpu */
2280         smp_call_function_many(tracing_buffer_mask,
2281                                enable_trace_buffered_event, NULL, 1);
2282         preempt_enable();
2283 }
2284
2285 static struct ring_buffer *temp_buffer;
2286
2287 struct ring_buffer_event *
2288 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2289                           struct trace_event_file *trace_file,
2290                           int type, unsigned long len,
2291                           unsigned long flags, int pc)
2292 {
2293         struct ring_buffer_event *entry;
2294         int val;
2295
2296         *current_rb = trace_file->tr->trace_buffer.buffer;
2297
2298         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2299              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2300             (entry = this_cpu_read(trace_buffered_event))) {
2301                 /* Try to use the per cpu buffer first */
2302                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2303                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2304                         trace_event_setup(entry, type, flags, pc);
2305                         entry->array[0] = len;
2306                         return entry;
2307                 }
2308                 this_cpu_dec(trace_buffered_event_cnt);
2309         }
2310
2311         entry = __trace_buffer_lock_reserve(*current_rb,
2312                                             type, len, flags, pc);
2313         /*
2314          * If tracing is off, but we have triggers enabled
2315          * we still need to look at the event data. Use the temp_buffer
2316          * to store the trace event for the tigger to use. It's recusive
2317          * safe and will not be recorded anywhere.
2318          */
2319         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2320                 *current_rb = temp_buffer;
2321                 entry = __trace_buffer_lock_reserve(*current_rb,
2322                                                     type, len, flags, pc);
2323         }
2324         return entry;
2325 }
2326 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2327
2328 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2329 static DEFINE_MUTEX(tracepoint_printk_mutex);
2330
2331 static void output_printk(struct trace_event_buffer *fbuffer)
2332 {
2333         struct trace_event_call *event_call;
2334         struct trace_event *event;
2335         unsigned long flags;
2336         struct trace_iterator *iter = tracepoint_print_iter;
2337
2338         /* We should never get here if iter is NULL */
2339         if (WARN_ON_ONCE(!iter))
2340                 return;
2341
2342         event_call = fbuffer->trace_file->event_call;
2343         if (!event_call || !event_call->event.funcs ||
2344             !event_call->event.funcs->trace)
2345                 return;
2346
2347         event = &fbuffer->trace_file->event_call->event;
2348
2349         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2350         trace_seq_init(&iter->seq);
2351         iter->ent = fbuffer->entry;
2352         event_call->event.funcs->trace(iter, 0, event);
2353         trace_seq_putc(&iter->seq, 0);
2354         printk("%s", iter->seq.buffer);
2355
2356         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2357 }
2358
2359 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2360                              void __user *buffer, size_t *lenp,
2361                              loff_t *ppos)
2362 {
2363         int save_tracepoint_printk;
2364         int ret;
2365
2366         mutex_lock(&tracepoint_printk_mutex);
2367         save_tracepoint_printk = tracepoint_printk;
2368
2369         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2370
2371         /*
2372          * This will force exiting early, as tracepoint_printk
2373          * is always zero when tracepoint_printk_iter is not allocated
2374          */
2375         if (!tracepoint_print_iter)
2376                 tracepoint_printk = 0;
2377
2378         if (save_tracepoint_printk == tracepoint_printk)
2379                 goto out;
2380
2381         if (tracepoint_printk)
2382                 static_key_enable(&tracepoint_printk_key.key);
2383         else
2384                 static_key_disable(&tracepoint_printk_key.key);
2385
2386  out:
2387         mutex_unlock(&tracepoint_printk_mutex);
2388
2389         return ret;
2390 }
2391
2392 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2393 {
2394         if (static_key_false(&tracepoint_printk_key.key))
2395                 output_printk(fbuffer);
2396
2397         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2398                                     fbuffer->event, fbuffer->entry,
2399                                     fbuffer->flags, fbuffer->pc);
2400 }
2401 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2402
2403 /*
2404  * Skip 3:
2405  *
2406  *   trace_buffer_unlock_commit_regs()
2407  *   trace_event_buffer_commit()
2408  *   trace_event_raw_event_xxx()
2409  */
2410 # define STACK_SKIP 3
2411
2412 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2413                                      struct ring_buffer *buffer,
2414                                      struct ring_buffer_event *event,
2415                                      unsigned long flags, int pc,
2416                                      struct pt_regs *regs)
2417 {
2418         __buffer_unlock_commit(buffer, event);
2419
2420         /*
2421          * If regs is not set, then skip the necessary functions.
2422          * Note, we can still get here via blktrace, wakeup tracer
2423          * and mmiotrace, but that's ok if they lose a function or
2424          * two. They are not that meaningful.
2425          */
2426         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2427         ftrace_trace_userstack(tr, buffer, flags, pc);
2428 }
2429
2430 /*
2431  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2432  */
2433 void
2434 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2435                                    struct ring_buffer_event *event)
2436 {
2437         __buffer_unlock_commit(buffer, event);
2438 }
2439
2440 static void
2441 trace_process_export(struct trace_export *export,
2442                struct ring_buffer_event *event)
2443 {
2444         struct trace_entry *entry;
2445         unsigned int size = 0;
2446
2447         entry = ring_buffer_event_data(event);
2448         size = ring_buffer_event_length(event);
2449         export->write(export, entry, size);
2450 }
2451
2452 static DEFINE_MUTEX(ftrace_export_lock);
2453
2454 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2455
2456 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2457
2458 static inline void ftrace_exports_enable(void)
2459 {
2460         static_branch_enable(&ftrace_exports_enabled);
2461 }
2462
2463 static inline void ftrace_exports_disable(void)
2464 {
2465         static_branch_disable(&ftrace_exports_enabled);
2466 }
2467
2468 void ftrace_exports(struct ring_buffer_event *event)
2469 {
2470         struct trace_export *export;
2471
2472         preempt_disable_notrace();
2473
2474         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2475         while (export) {
2476                 trace_process_export(export, event);
2477                 export = rcu_dereference_raw_notrace(export->next);
2478         }
2479
2480         preempt_enable_notrace();
2481 }
2482
2483 static inline void
2484 add_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486         rcu_assign_pointer(export->next, *list);
2487         /*
2488          * We are entering export into the list but another
2489          * CPU might be walking that list. We need to make sure
2490          * the export->next pointer is valid before another CPU sees
2491          * the export pointer included into the list.
2492          */
2493         rcu_assign_pointer(*list, export);
2494 }
2495
2496 static inline int
2497 rm_trace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499         struct trace_export **p;
2500
2501         for (p = list; *p != NULL; p = &(*p)->next)
2502                 if (*p == export)
2503                         break;
2504
2505         if (*p != export)
2506                 return -1;
2507
2508         rcu_assign_pointer(*p, (*p)->next);
2509
2510         return 0;
2511 }
2512
2513 static inline void
2514 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2515 {
2516         if (*list == NULL)
2517                 ftrace_exports_enable();
2518
2519         add_trace_export(list, export);
2520 }
2521
2522 static inline int
2523 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2524 {
2525         int ret;
2526
2527         ret = rm_trace_export(list, export);
2528         if (*list == NULL)
2529                 ftrace_exports_disable();
2530
2531         return ret;
2532 }
2533
2534 int register_ftrace_export(struct trace_export *export)
2535 {
2536         if (WARN_ON_ONCE(!export->write))
2537                 return -1;
2538
2539         mutex_lock(&ftrace_export_lock);
2540
2541         add_ftrace_export(&ftrace_exports_list, export);
2542
2543         mutex_unlock(&ftrace_export_lock);
2544
2545         return 0;
2546 }
2547 EXPORT_SYMBOL_GPL(register_ftrace_export);
2548
2549 int unregister_ftrace_export(struct trace_export *export)
2550 {
2551         int ret;
2552
2553         mutex_lock(&ftrace_export_lock);
2554
2555         ret = rm_ftrace_export(&ftrace_exports_list, export);
2556
2557         mutex_unlock(&ftrace_export_lock);
2558
2559         return ret;
2560 }
2561 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2562
2563 void
2564 trace_function(struct trace_array *tr,
2565                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2566                int pc)
2567 {
2568         struct trace_event_call *call = &event_function;
2569         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2570         struct ring_buffer_event *event;
2571         struct ftrace_entry *entry;
2572
2573         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2574                                             flags, pc);
2575         if (!event)
2576                 return;
2577         entry   = ring_buffer_event_data(event);
2578         entry->ip                       = ip;
2579         entry->parent_ip                = parent_ip;
2580
2581         if (!call_filter_check_discard(call, entry, buffer, event)) {
2582                 if (static_branch_unlikely(&ftrace_exports_enabled))
2583                         ftrace_exports(event);
2584                 __buffer_unlock_commit(buffer, event);
2585         }
2586 }
2587
2588 #ifdef CONFIG_STACKTRACE
2589
2590 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2591 struct ftrace_stack {
2592         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2593 };
2594
2595 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2596 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2597
2598 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2599                                  unsigned long flags,
2600                                  int skip, int pc, struct pt_regs *regs)
2601 {
2602         struct trace_event_call *call = &event_kernel_stack;
2603         struct ring_buffer_event *event;
2604         struct stack_entry *entry;
2605         struct stack_trace trace;
2606         int use_stack;
2607         int size = FTRACE_STACK_ENTRIES;
2608
2609         trace.nr_entries        = 0;
2610         trace.skip              = skip;
2611
2612         /*
2613          * Add one, for this function and the call to save_stack_trace()
2614          * If regs is set, then these functions will not be in the way.
2615          */
2616 #ifndef CONFIG_UNWINDER_ORC
2617         if (!regs)
2618                 trace.skip++;
2619 #endif
2620
2621         /*
2622          * Since events can happen in NMIs there's no safe way to
2623          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2624          * or NMI comes in, it will just have to use the default
2625          * FTRACE_STACK_SIZE.
2626          */
2627         preempt_disable_notrace();
2628
2629         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2630         /*
2631          * We don't need any atomic variables, just a barrier.
2632          * If an interrupt comes in, we don't care, because it would
2633          * have exited and put the counter back to what we want.
2634          * We just need a barrier to keep gcc from moving things
2635          * around.
2636          */
2637         barrier();
2638         if (use_stack == 1) {
2639                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2640                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2641
2642                 if (regs)
2643                         save_stack_trace_regs(regs, &trace);
2644                 else
2645                         save_stack_trace(&trace);
2646
2647                 if (trace.nr_entries > size)
2648                         size = trace.nr_entries;
2649         } else
2650                 /* From now on, use_stack is a boolean */
2651                 use_stack = 0;
2652
2653         size *= sizeof(unsigned long);
2654
2655         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2656                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2657                                     flags, pc);
2658         if (!event)
2659                 goto out;
2660         entry = ring_buffer_event_data(event);
2661
2662         memset(&entry->caller, 0, size);
2663
2664         if (use_stack)
2665                 memcpy(&entry->caller, trace.entries,
2666                        trace.nr_entries * sizeof(unsigned long));
2667         else {
2668                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2669                 trace.entries           = entry->caller;
2670                 if (regs)
2671                         save_stack_trace_regs(regs, &trace);
2672                 else
2673                         save_stack_trace(&trace);
2674         }
2675
2676         entry->size = trace.nr_entries;
2677
2678         if (!call_filter_check_discard(call, entry, buffer, event))
2679                 __buffer_unlock_commit(buffer, event);
2680
2681  out:
2682         /* Again, don't let gcc optimize things here */
2683         barrier();
2684         __this_cpu_dec(ftrace_stack_reserve);
2685         preempt_enable_notrace();
2686
2687 }
2688
2689 static inline void ftrace_trace_stack(struct trace_array *tr,
2690                                       struct ring_buffer *buffer,
2691                                       unsigned long flags,
2692                                       int skip, int pc, struct pt_regs *regs)
2693 {
2694         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2695                 return;
2696
2697         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2698 }
2699
2700 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2701                    int pc)
2702 {
2703         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2704
2705         if (rcu_is_watching()) {
2706                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707                 return;
2708         }
2709
2710         /*
2711          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2712          * but if the above rcu_is_watching() failed, then the NMI
2713          * triggered someplace critical, and rcu_irq_enter() should
2714          * not be called from NMI.
2715          */
2716         if (unlikely(in_nmi()))
2717                 return;
2718
2719         rcu_irq_enter_irqson();
2720         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2721         rcu_irq_exit_irqson();
2722 }
2723
2724 /**
2725  * trace_dump_stack - record a stack back trace in the trace buffer
2726  * @skip: Number of functions to skip (helper handlers)
2727  */
2728 void trace_dump_stack(int skip)
2729 {
2730         unsigned long flags;
2731
2732         if (tracing_disabled || tracing_selftest_running)
2733                 return;
2734
2735         local_save_flags(flags);
2736
2737 #ifndef CONFIG_UNWINDER_ORC
2738         /* Skip 1 to skip this function. */
2739         skip++;
2740 #endif
2741         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2742                              flags, skip, preempt_count(), NULL);
2743 }
2744
2745 static DEFINE_PER_CPU(int, user_stack_count);
2746
2747 void
2748 ftrace_trace_userstack(struct trace_array *tr,
2749                        struct ring_buffer *buffer, unsigned long flags, int pc)
2750 {
2751         struct trace_event_call *call = &event_user_stack;
2752         struct ring_buffer_event *event;
2753         struct userstack_entry *entry;
2754         struct stack_trace trace;
2755
2756         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2757                 return;
2758
2759         /*
2760          * NMIs can not handle page faults, even with fix ups.
2761          * The save user stack can (and often does) fault.
2762          */
2763         if (unlikely(in_nmi()))
2764                 return;
2765
2766         /*
2767          * prevent recursion, since the user stack tracing may
2768          * trigger other kernel events.
2769          */
2770         preempt_disable();
2771         if (__this_cpu_read(user_stack_count))
2772                 goto out;
2773
2774         __this_cpu_inc(user_stack_count);
2775
2776         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2777                                             sizeof(*entry), flags, pc);
2778         if (!event)
2779                 goto out_drop_count;
2780         entry   = ring_buffer_event_data(event);
2781
2782         entry->tgid             = current->tgid;
2783         memset(&entry->caller, 0, sizeof(entry->caller));
2784
2785         trace.nr_entries        = 0;
2786         trace.max_entries       = FTRACE_STACK_ENTRIES;
2787         trace.skip              = 0;
2788         trace.entries           = entry->caller;
2789
2790         save_stack_trace_user(&trace);
2791         if (!call_filter_check_discard(call, entry, buffer, event))
2792                 __buffer_unlock_commit(buffer, event);
2793
2794  out_drop_count:
2795         __this_cpu_dec(user_stack_count);
2796  out:
2797         preempt_enable();
2798 }
2799
2800 #ifdef UNUSED
2801 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2802 {
2803         ftrace_trace_userstack(tr, flags, preempt_count());
2804 }
2805 #endif /* UNUSED */
2806
2807 #endif /* CONFIG_STACKTRACE */
2808
2809 /* created for use with alloc_percpu */
2810 struct trace_buffer_struct {
2811         int nesting;
2812         char buffer[4][TRACE_BUF_SIZE];
2813 };
2814
2815 static struct trace_buffer_struct *trace_percpu_buffer;
2816
2817 /*
2818  * Thise allows for lockless recording.  If we're nested too deeply, then
2819  * this returns NULL.
2820  */
2821 static char *get_trace_buf(void)
2822 {
2823         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2824
2825         if (!buffer || buffer->nesting >= 4)
2826                 return NULL;
2827
2828         buffer->nesting++;
2829
2830         /* Interrupts must see nesting incremented before we use the buffer */
2831         barrier();
2832         return &buffer->buffer[buffer->nesting - 1][0];
2833 }
2834
2835 static void put_trace_buf(void)
2836 {
2837         /* Don't let the decrement of nesting leak before this */
2838         barrier();
2839         this_cpu_dec(trace_percpu_buffer->nesting);
2840 }
2841
2842 static int alloc_percpu_trace_buffer(void)
2843 {
2844         struct trace_buffer_struct *buffers;
2845
2846         buffers = alloc_percpu(struct trace_buffer_struct);
2847         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2848                 return -ENOMEM;
2849
2850         trace_percpu_buffer = buffers;
2851         return 0;
2852 }
2853
2854 static int buffers_allocated;
2855
2856 void trace_printk_init_buffers(void)
2857 {
2858         if (buffers_allocated)
2859                 return;
2860
2861         if (alloc_percpu_trace_buffer())
2862                 return;
2863
2864         /* trace_printk() is for debug use only. Don't use it in production. */
2865
2866         pr_warn("\n");
2867         pr_warn("**********************************************************\n");
2868         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2869         pr_warn("**                                                      **\n");
2870         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2871         pr_warn("**                                                      **\n");
2872         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2873         pr_warn("** unsafe for production use.                           **\n");
2874         pr_warn("**                                                      **\n");
2875         pr_warn("** If you see this message and you are not debugging    **\n");
2876         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2877         pr_warn("**                                                      **\n");
2878         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2879         pr_warn("**********************************************************\n");
2880
2881         /* Expand the buffers to set size */
2882         tracing_update_buffers();
2883
2884         buffers_allocated = 1;
2885
2886         /*
2887          * trace_printk_init_buffers() can be called by modules.
2888          * If that happens, then we need to start cmdline recording
2889          * directly here. If the global_trace.buffer is already
2890          * allocated here, then this was called by module code.
2891          */
2892         if (global_trace.trace_buffer.buffer)
2893                 tracing_start_cmdline_record();
2894 }
2895
2896 void trace_printk_start_comm(void)
2897 {
2898         /* Start tracing comms if trace printk is set */
2899         if (!buffers_allocated)
2900                 return;
2901         tracing_start_cmdline_record();
2902 }
2903
2904 static void trace_printk_start_stop_comm(int enabled)
2905 {
2906         if (!buffers_allocated)
2907                 return;
2908
2909         if (enabled)
2910                 tracing_start_cmdline_record();
2911         else
2912                 tracing_stop_cmdline_record();
2913 }
2914
2915 /**
2916  * trace_vbprintk - write binary msg to tracing buffer
2917  *
2918  */
2919 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2920 {
2921         struct trace_event_call *call = &event_bprint;
2922         struct ring_buffer_event *event;
2923         struct ring_buffer *buffer;
2924         struct trace_array *tr = &global_trace;
2925         struct bprint_entry *entry;
2926         unsigned long flags;
2927         char *tbuffer;
2928         int len = 0, size, pc;
2929
2930         if (unlikely(tracing_selftest_running || tracing_disabled))
2931                 return 0;
2932
2933         /* Don't pollute graph traces with trace_vprintk internals */
2934         pause_graph_tracing();
2935
2936         pc = preempt_count();
2937         preempt_disable_notrace();
2938
2939         tbuffer = get_trace_buf();
2940         if (!tbuffer) {
2941                 len = 0;
2942                 goto out_nobuffer;
2943         }
2944
2945         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2946
2947         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2948                 goto out;
2949
2950         local_save_flags(flags);
2951         size = sizeof(*entry) + sizeof(u32) * len;
2952         buffer = tr->trace_buffer.buffer;
2953         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2954                                             flags, pc);
2955         if (!event)
2956                 goto out;
2957         entry = ring_buffer_event_data(event);
2958         entry->ip                       = ip;
2959         entry->fmt                      = fmt;
2960
2961         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2962         if (!call_filter_check_discard(call, entry, buffer, event)) {
2963                 __buffer_unlock_commit(buffer, event);
2964                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2965         }
2966
2967 out:
2968         put_trace_buf();
2969
2970 out_nobuffer:
2971         preempt_enable_notrace();
2972         unpause_graph_tracing();
2973
2974         return len;
2975 }
2976 EXPORT_SYMBOL_GPL(trace_vbprintk);
2977
2978 __printf(3, 0)
2979 static int
2980 __trace_array_vprintk(struct ring_buffer *buffer,
2981                       unsigned long ip, const char *fmt, va_list args)
2982 {
2983         struct trace_event_call *call = &event_print;
2984         struct ring_buffer_event *event;
2985         int len = 0, size, pc;
2986         struct print_entry *entry;
2987         unsigned long flags;
2988         char *tbuffer;
2989
2990         if (tracing_disabled || tracing_selftest_running)
2991                 return 0;
2992
2993         /* Don't pollute graph traces with trace_vprintk internals */
2994         pause_graph_tracing();
2995
2996         pc = preempt_count();
2997         preempt_disable_notrace();
2998
2999
3000         tbuffer = get_trace_buf();
3001         if (!tbuffer) {
3002                 len = 0;
3003                 goto out_nobuffer;
3004         }
3005
3006         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3007
3008         local_save_flags(flags);
3009         size = sizeof(*entry) + len + 1;
3010         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3011                                             flags, pc);
3012         if (!event)
3013                 goto out;
3014         entry = ring_buffer_event_data(event);
3015         entry->ip = ip;
3016
3017         memcpy(&entry->buf, tbuffer, len + 1);
3018         if (!call_filter_check_discard(call, entry, buffer, event)) {
3019                 __buffer_unlock_commit(buffer, event);
3020                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3021         }
3022
3023 out:
3024         put_trace_buf();
3025
3026 out_nobuffer:
3027         preempt_enable_notrace();
3028         unpause_graph_tracing();
3029
3030         return len;
3031 }
3032
3033 __printf(3, 0)
3034 int trace_array_vprintk(struct trace_array *tr,
3035                         unsigned long ip, const char *fmt, va_list args)
3036 {
3037         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3038 }
3039
3040 __printf(3, 0)
3041 int trace_array_printk(struct trace_array *tr,
3042                        unsigned long ip, const char *fmt, ...)
3043 {
3044         int ret;
3045         va_list ap;
3046
3047         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3048                 return 0;
3049
3050         if (!tr)
3051                 return -ENOENT;
3052
3053         va_start(ap, fmt);
3054         ret = trace_array_vprintk(tr, ip, fmt, ap);
3055         va_end(ap);
3056         return ret;
3057 }
3058
3059 __printf(3, 4)
3060 int trace_array_printk_buf(struct ring_buffer *buffer,
3061                            unsigned long ip, const char *fmt, ...)
3062 {
3063         int ret;
3064         va_list ap;
3065
3066         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3067                 return 0;
3068
3069         va_start(ap, fmt);
3070         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3071         va_end(ap);
3072         return ret;
3073 }
3074
3075 __printf(2, 0)
3076 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3077 {
3078         return trace_array_vprintk(&global_trace, ip, fmt, args);
3079 }
3080 EXPORT_SYMBOL_GPL(trace_vprintk);
3081
3082 static void trace_iterator_increment(struct trace_iterator *iter)
3083 {
3084         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3085
3086         iter->idx++;
3087         if (buf_iter)
3088                 ring_buffer_read(buf_iter, NULL);
3089 }
3090
3091 static struct trace_entry *
3092 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3093                 unsigned long *lost_events)
3094 {
3095         struct ring_buffer_event *event;
3096         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3097
3098         if (buf_iter)
3099                 event = ring_buffer_iter_peek(buf_iter, ts);
3100         else
3101                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3102                                          lost_events);
3103
3104         if (event) {
3105                 iter->ent_size = ring_buffer_event_length(event);
3106                 return ring_buffer_event_data(event);
3107         }
3108         iter->ent_size = 0;
3109         return NULL;
3110 }
3111
3112 static struct trace_entry *
3113 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3114                   unsigned long *missing_events, u64 *ent_ts)
3115 {
3116         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3117         struct trace_entry *ent, *next = NULL;
3118         unsigned long lost_events = 0, next_lost = 0;
3119         int cpu_file = iter->cpu_file;
3120         u64 next_ts = 0, ts;
3121         int next_cpu = -1;
3122         int next_size = 0;
3123         int cpu;
3124
3125         /*
3126          * If we are in a per_cpu trace file, don't bother by iterating over
3127          * all cpu and peek directly.
3128          */
3129         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3130                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3131                         return NULL;
3132                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3133                 if (ent_cpu)
3134                         *ent_cpu = cpu_file;
3135
3136                 return ent;
3137         }
3138
3139         for_each_tracing_cpu(cpu) {
3140
3141                 if (ring_buffer_empty_cpu(buffer, cpu))
3142                         continue;
3143
3144                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3145
3146                 /*
3147                  * Pick the entry with the smallest timestamp:
3148                  */
3149                 if (ent && (!next || ts < next_ts)) {
3150                         next = ent;
3151                         next_cpu = cpu;
3152                         next_ts = ts;
3153                         next_lost = lost_events;
3154                         next_size = iter->ent_size;
3155                 }
3156         }
3157
3158         iter->ent_size = next_size;
3159
3160         if (ent_cpu)
3161                 *ent_cpu = next_cpu;
3162
3163         if (ent_ts)
3164                 *ent_ts = next_ts;
3165
3166         if (missing_events)
3167                 *missing_events = next_lost;
3168
3169         return next;
3170 }
3171
3172 /* Find the next real entry, without updating the iterator itself */
3173 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3174                                           int *ent_cpu, u64 *ent_ts)
3175 {
3176         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3177 }
3178
3179 /* Find the next real entry, and increment the iterator to the next entry */
3180 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3181 {
3182         iter->ent = __find_next_entry(iter, &iter->cpu,
3183                                       &iter->lost_events, &iter->ts);
3184
3185         if (iter->ent)
3186                 trace_iterator_increment(iter);
3187
3188         return iter->ent ? iter : NULL;
3189 }
3190
3191 static void trace_consume(struct trace_iterator *iter)
3192 {
3193         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3194                             &iter->lost_events);
3195 }
3196
3197 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3198 {
3199         struct trace_iterator *iter = m->private;
3200         int i = (int)*pos;
3201         void *ent;
3202
3203         WARN_ON_ONCE(iter->leftover);
3204
3205         (*pos)++;
3206
3207         /* can't go backwards */
3208         if (iter->idx > i)
3209                 return NULL;
3210
3211         if (iter->idx < 0)
3212                 ent = trace_find_next_entry_inc(iter);
3213         else
3214                 ent = iter;
3215
3216         while (ent && iter->idx < i)
3217                 ent = trace_find_next_entry_inc(iter);
3218
3219         iter->pos = *pos;
3220
3221         return ent;
3222 }
3223
3224 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3225 {
3226         struct ring_buffer_event *event;
3227         struct ring_buffer_iter *buf_iter;
3228         unsigned long entries = 0;
3229         u64 ts;
3230
3231         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3232
3233         buf_iter = trace_buffer_iter(iter, cpu);
3234         if (!buf_iter)
3235                 return;
3236
3237         ring_buffer_iter_reset(buf_iter);
3238
3239         /*
3240          * We could have the case with the max latency tracers
3241          * that a reset never took place on a cpu. This is evident
3242          * by the timestamp being before the start of the buffer.
3243          */
3244         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3245                 if (ts >= iter->trace_buffer->time_start)
3246                         break;
3247                 entries++;
3248                 ring_buffer_read(buf_iter, NULL);
3249         }
3250
3251         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3252 }
3253
3254 /*
3255  * The current tracer is copied to avoid a global locking
3256  * all around.
3257  */
3258 static void *s_start(struct seq_file *m, loff_t *pos)
3259 {
3260         struct trace_iterator *iter = m->private;
3261         struct trace_array *tr = iter->tr;
3262         int cpu_file = iter->cpu_file;
3263         void *p = NULL;
3264         loff_t l = 0;
3265         int cpu;
3266
3267         /*
3268          * copy the tracer to avoid using a global lock all around.
3269          * iter->trace is a copy of current_trace, the pointer to the
3270          * name may be used instead of a strcmp(), as iter->trace->name
3271          * will point to the same string as current_trace->name.
3272          */
3273         mutex_lock(&trace_types_lock);
3274         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3275                 *iter->trace = *tr->current_trace;
3276         mutex_unlock(&trace_types_lock);
3277
3278 #ifdef CONFIG_TRACER_MAX_TRACE
3279         if (iter->snapshot && iter->trace->use_max_tr)
3280                 return ERR_PTR(-EBUSY);
3281 #endif
3282
3283         if (*pos != iter->pos) {
3284                 iter->ent = NULL;
3285                 iter->cpu = 0;
3286                 iter->idx = -1;
3287
3288                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3289                         for_each_tracing_cpu(cpu)
3290                                 tracing_iter_reset(iter, cpu);
3291                 } else
3292                         tracing_iter_reset(iter, cpu_file);
3293
3294                 iter->leftover = 0;
3295                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3296                         ;
3297
3298         } else {
3299                 /*
3300                  * If we overflowed the seq_file before, then we want
3301                  * to just reuse the trace_seq buffer again.
3302                  */
3303                 if (iter->leftover)
3304                         p = iter;
3305                 else {
3306                         l = *pos - 1;
3307                         p = s_next(m, p, &l);
3308                 }
3309         }
3310
3311         trace_event_read_lock();
3312         trace_access_lock(cpu_file);
3313         return p;
3314 }
3315
3316 static void s_stop(struct seq_file *m, void *p)
3317 {
3318         struct trace_iterator *iter = m->private;
3319
3320 #ifdef CONFIG_TRACER_MAX_TRACE
3321         if (iter->snapshot && iter->trace->use_max_tr)
3322                 return;
3323 #endif
3324
3325         trace_access_unlock(iter->cpu_file);
3326         trace_event_read_unlock();
3327 }
3328
3329 static void
3330 get_total_entries(struct trace_buffer *buf,
3331                   unsigned long *total, unsigned long *entries)
3332 {
3333         unsigned long count;
3334         int cpu;
3335
3336         *total = 0;
3337         *entries = 0;
3338
3339         for_each_tracing_cpu(cpu) {
3340                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3341                 /*
3342                  * If this buffer has skipped entries, then we hold all
3343                  * entries for the trace and we need to ignore the
3344                  * ones before the time stamp.
3345                  */
3346                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3347                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3348                         /* total is the same as the entries */
3349                         *total += count;
3350                 } else
3351                         *total += count +
3352                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3353                 *entries += count;
3354         }
3355 }
3356
3357 static void print_lat_help_header(struct seq_file *m)
3358 {
3359         seq_puts(m, "#                  _------=> CPU#            \n"
3360                     "#                 / _-----=> irqs-off        \n"
3361                     "#                | / _----=> need-resched    \n"
3362                     "#                || / _---=> hardirq/softirq \n"
3363                     "#                ||| / _--=> preempt-depth   \n"
3364                     "#                |||| /     delay            \n"
3365                     "#  cmd     pid   ||||| time  |   caller      \n"
3366                     "#     \\   /      |||||  \\    |   /         \n");
3367 }
3368
3369 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3370 {
3371         unsigned long total;
3372         unsigned long entries;
3373
3374         get_total_entries(buf, &total, &entries);
3375         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3376                    entries, total, num_online_cpus());
3377         seq_puts(m, "#\n");
3378 }
3379
3380 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3381                                    unsigned int flags)
3382 {
3383         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3384
3385         print_event_info(buf, m);
3386
3387         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3388         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3389 }
3390
3391 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3392                                        unsigned int flags)
3393 {
3394         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3395         const char tgid_space[] = "          ";
3396         const char space[] = "  ";
3397
3398         print_event_info(buf, m);
3399
3400         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3401                    tgid ? tgid_space : space);
3402         seq_printf(m, "#                          %s / _----=> need-resched\n",
3403                    tgid ? tgid_space : space);
3404         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3405                    tgid ? tgid_space : space);
3406         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3407                    tgid ? tgid_space : space);
3408         seq_printf(m, "#                          %s||| /     delay\n",
3409                    tgid ? tgid_space : space);
3410         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3411                    tgid ? "   TGID   " : space);
3412         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3413                    tgid ? "     |    " : space);
3414 }
3415
3416 void
3417 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3418 {
3419         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3420         struct trace_buffer *buf = iter->trace_buffer;
3421         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3422         struct tracer *type = iter->trace;
3423         unsigned long entries;
3424         unsigned long total;
3425         const char *name = "preemption";
3426
3427         name = type->name;
3428
3429         get_total_entries(buf, &total, &entries);
3430
3431         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3432                    name, UTS_RELEASE);
3433         seq_puts(m, "# -----------------------------------"
3434                  "---------------------------------\n");
3435         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3436                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3437                    nsecs_to_usecs(data->saved_latency),
3438                    entries,
3439                    total,
3440                    buf->cpu,
3441 #if defined(CONFIG_PREEMPT_NONE)
3442                    "server",
3443 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3444                    "desktop",
3445 #elif defined(CONFIG_PREEMPT)
3446                    "preempt",
3447 #else
3448                    "unknown",
3449 #endif
3450                    /* These are reserved for later use */
3451                    0, 0, 0, 0);
3452 #ifdef CONFIG_SMP
3453         seq_printf(m, " #P:%d)\n", num_online_cpus());
3454 #else
3455         seq_puts(m, ")\n");
3456 #endif
3457         seq_puts(m, "#    -----------------\n");
3458         seq_printf(m, "#    | task: %.16s-%d "
3459                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3460                    data->comm, data->pid,
3461                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3462                    data->policy, data->rt_priority);
3463         seq_puts(m, "#    -----------------\n");
3464
3465         if (data->critical_start) {
3466                 seq_puts(m, "#  => started at: ");
3467                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3468                 trace_print_seq(m, &iter->seq);
3469                 seq_puts(m, "\n#  => ended at:   ");
3470                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3471                 trace_print_seq(m, &iter->seq);
3472                 seq_puts(m, "\n#\n");
3473         }
3474
3475         seq_puts(m, "#\n");
3476 }
3477
3478 static void test_cpu_buff_start(struct trace_iterator *iter)
3479 {
3480         struct trace_seq *s = &iter->seq;
3481         struct trace_array *tr = iter->tr;
3482
3483         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3484                 return;
3485
3486         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3487                 return;
3488
3489         if (cpumask_available(iter->started) &&
3490             cpumask_test_cpu(iter->cpu, iter->started))
3491                 return;
3492
3493         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3494                 return;
3495
3496         if (cpumask_available(iter->started))
3497                 cpumask_set_cpu(iter->cpu, iter->started);
3498
3499         /* Don't print started cpu buffer for the first entry of the trace */
3500         if (iter->idx > 1)
3501                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3502                                 iter->cpu);
3503 }
3504
3505 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3506 {
3507         struct trace_array *tr = iter->tr;
3508         struct trace_seq *s = &iter->seq;
3509         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3510         struct trace_entry *entry;
3511         struct trace_event *event;
3512
3513         entry = iter->ent;
3514
3515         test_cpu_buff_start(iter);
3516
3517         event = ftrace_find_event(entry->type);
3518
3519         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3520                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3521                         trace_print_lat_context(iter);
3522                 else
3523                         trace_print_context(iter);
3524         }
3525
3526         if (trace_seq_has_overflowed(s))
3527                 return TRACE_TYPE_PARTIAL_LINE;
3528
3529         if (event)
3530                 return event->funcs->trace(iter, sym_flags, event);
3531
3532         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3533
3534         return trace_handle_return(s);
3535 }
3536
3537 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3538 {
3539         struct trace_array *tr = iter->tr;
3540         struct trace_seq *s = &iter->seq;
3541         struct trace_entry *entry;
3542         struct trace_event *event;
3543
3544         entry = iter->ent;
3545
3546         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3547                 trace_seq_printf(s, "%d %d %llu ",
3548                                  entry->pid, iter->cpu, iter->ts);
3549
3550         if (trace_seq_has_overflowed(s))
3551                 return TRACE_TYPE_PARTIAL_LINE;
3552
3553         event = ftrace_find_event(entry->type);
3554         if (event)
3555                 return event->funcs->raw(iter, 0, event);
3556
3557         trace_seq_printf(s, "%d ?\n", entry->type);
3558
3559         return trace_handle_return(s);
3560 }
3561
3562 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3563 {
3564         struct trace_array *tr = iter->tr;
3565         struct trace_seq *s = &iter->seq;
3566         unsigned char newline = '\n';
3567         struct trace_entry *entry;
3568         struct trace_event *event;
3569
3570         entry = iter->ent;
3571
3572         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3573                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3574                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3575                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3576                 if (trace_seq_has_overflowed(s))
3577                         return TRACE_TYPE_PARTIAL_LINE;
3578         }
3579
3580         event = ftrace_find_event(entry->type);
3581         if (event) {
3582                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3583                 if (ret != TRACE_TYPE_HANDLED)
3584                         return ret;
3585         }
3586
3587         SEQ_PUT_FIELD(s, newline);
3588
3589         return trace_handle_return(s);
3590 }
3591
3592 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3593 {
3594         struct trace_array *tr = iter->tr;
3595         struct trace_seq *s = &iter->seq;
3596         struct trace_entry *entry;
3597         struct trace_event *event;
3598
3599         entry = iter->ent;
3600
3601         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3602                 SEQ_PUT_FIELD(s, entry->pid);
3603                 SEQ_PUT_FIELD(s, iter->cpu);
3604                 SEQ_PUT_FIELD(s, iter->ts);
3605                 if (trace_seq_has_overflowed(s))
3606                         return TRACE_TYPE_PARTIAL_LINE;
3607         }
3608
3609         event = ftrace_find_event(entry->type);
3610         return event ? event->funcs->binary(iter, 0, event) :
3611                 TRACE_TYPE_HANDLED;
3612 }
3613
3614 int trace_empty(struct trace_iterator *iter)
3615 {
3616         struct ring_buffer_iter *buf_iter;
3617         int cpu;
3618
3619         /* If we are looking at one CPU buffer, only check that one */
3620         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3621                 cpu = iter->cpu_file;
3622                 buf_iter = trace_buffer_iter(iter, cpu);
3623                 if (buf_iter) {
3624                         if (!ring_buffer_iter_empty(buf_iter))
3625                                 return 0;
3626                 } else {
3627                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3628                                 return 0;
3629                 }
3630                 return 1;
3631         }
3632
3633         for_each_tracing_cpu(cpu) {
3634                 buf_iter = trace_buffer_iter(iter, cpu);
3635                 if (buf_iter) {
3636                         if (!ring_buffer_iter_empty(buf_iter))
3637                                 return 0;
3638                 } else {
3639                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3640                                 return 0;
3641                 }
3642         }
3643
3644         return 1;
3645 }
3646
3647 /*  Called with trace_event_read_lock() held. */
3648 enum print_line_t print_trace_line(struct trace_iterator *iter)
3649 {
3650         struct trace_array *tr = iter->tr;
3651         unsigned long trace_flags = tr->trace_flags;
3652         enum print_line_t ret;
3653
3654         if (iter->lost_events) {
3655                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3656                                  iter->cpu, iter->lost_events);
3657                 if (trace_seq_has_overflowed(&iter->seq))
3658                         return TRACE_TYPE_PARTIAL_LINE;
3659         }
3660
3661         if (iter->trace && iter->trace->print_line) {
3662                 ret = iter->trace->print_line(iter);
3663                 if (ret != TRACE_TYPE_UNHANDLED)
3664                         return ret;
3665         }
3666
3667         if (iter->ent->type == TRACE_BPUTS &&
3668                         trace_flags & TRACE_ITER_PRINTK &&
3669                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3670                 return trace_print_bputs_msg_only(iter);
3671
3672         if (iter->ent->type == TRACE_BPRINT &&
3673                         trace_flags & TRACE_ITER_PRINTK &&
3674                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3675                 return trace_print_bprintk_msg_only(iter);
3676
3677         if (iter->ent->type == TRACE_PRINT &&
3678                         trace_flags & TRACE_ITER_PRINTK &&
3679                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3680                 return trace_print_printk_msg_only(iter);
3681
3682         if (trace_flags & TRACE_ITER_BIN)
3683                 return print_bin_fmt(iter);
3684
3685         if (trace_flags & TRACE_ITER_HEX)
3686                 return print_hex_fmt(iter);
3687
3688         if (trace_flags & TRACE_ITER_RAW)
3689                 return print_raw_fmt(iter);
3690
3691         return print_trace_fmt(iter);
3692 }
3693
3694 void trace_latency_header(struct seq_file *m)
3695 {
3696         struct trace_iterator *iter = m->private;
3697         struct trace_array *tr = iter->tr;
3698
3699         /* print nothing if the buffers are empty */
3700         if (trace_empty(iter))
3701                 return;
3702
3703         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3704                 print_trace_header(m, iter);
3705
3706         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3707                 print_lat_help_header(m);
3708 }
3709
3710 void trace_default_header(struct seq_file *m)
3711 {
3712         struct trace_iterator *iter = m->private;
3713         struct trace_array *tr = iter->tr;
3714         unsigned long trace_flags = tr->trace_flags;
3715
3716         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3717                 return;
3718
3719         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3720                 /* print nothing if the buffers are empty */
3721                 if (trace_empty(iter))
3722                         return;
3723                 print_trace_header(m, iter);
3724                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3725                         print_lat_help_header(m);
3726         } else {
3727                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3728                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3729                                 print_func_help_header_irq(iter->trace_buffer,
3730                                                            m, trace_flags);
3731                         else
3732                                 print_func_help_header(iter->trace_buffer, m,
3733                                                        trace_flags);
3734                 }
3735         }
3736 }
3737
3738 static void test_ftrace_alive(struct seq_file *m)
3739 {
3740         if (!ftrace_is_dead())
3741                 return;
3742         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3743                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3744 }
3745
3746 #ifdef CONFIG_TRACER_MAX_TRACE
3747 static void show_snapshot_main_help(struct seq_file *m)
3748 {
3749         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3750                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3751                     "#                      Takes a snapshot of the main buffer.\n"
3752                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3753                     "#                      (Doesn't have to be '2' works with any number that\n"
3754                     "#                       is not a '0' or '1')\n");
3755 }
3756
3757 static void show_snapshot_percpu_help(struct seq_file *m)
3758 {
3759         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3760 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3761         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3762                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3763 #else
3764         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3765                     "#                     Must use main snapshot file to allocate.\n");
3766 #endif
3767         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3768                     "#                      (Doesn't have to be '2' works with any number that\n"
3769                     "#                       is not a '0' or '1')\n");
3770 }
3771
3772 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3773 {
3774         if (iter->tr->allocated_snapshot)
3775                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3776         else
3777                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3778
3779         seq_puts(m, "# Snapshot commands:\n");
3780         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3781                 show_snapshot_main_help(m);
3782         else
3783                 show_snapshot_percpu_help(m);
3784 }
3785 #else
3786 /* Should never be called */
3787 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3788 #endif
3789
3790 static int s_show(struct seq_file *m, void *v)
3791 {
3792         struct trace_iterator *iter = v;
3793         int ret;
3794
3795         if (iter->ent == NULL) {
3796                 if (iter->tr) {
3797                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3798                         seq_puts(m, "#\n");
3799                         test_ftrace_alive(m);
3800                 }
3801                 if (iter->snapshot && trace_empty(iter))
3802                         print_snapshot_help(m, iter);
3803                 else if (iter->trace && iter->trace->print_header)
3804                         iter->trace->print_header(m);
3805                 else
3806                         trace_default_header(m);
3807
3808         } else if (iter->leftover) {
3809                 /*
3810                  * If we filled the seq_file buffer earlier, we
3811                  * want to just show it now.
3812                  */
3813                 ret = trace_print_seq(m, &iter->seq);
3814
3815                 /* ret should this time be zero, but you never know */
3816                 iter->leftover = ret;
3817
3818         } else {
3819                 print_trace_line(iter);
3820                 ret = trace_print_seq(m, &iter->seq);
3821                 /*
3822                  * If we overflow the seq_file buffer, then it will
3823                  * ask us for this data again at start up.
3824                  * Use that instead.
3825                  *  ret is 0 if seq_file write succeeded.
3826                  *        -1 otherwise.
3827                  */
3828                 iter->leftover = ret;
3829         }
3830
3831         return 0;
3832 }
3833
3834 /*
3835  * Should be used after trace_array_get(), trace_types_lock
3836  * ensures that i_cdev was already initialized.
3837  */
3838 static inline int tracing_get_cpu(struct inode *inode)
3839 {
3840         if (inode->i_cdev) /* See trace_create_cpu_file() */
3841                 return (long)inode->i_cdev - 1;
3842         return RING_BUFFER_ALL_CPUS;
3843 }
3844
3845 static const struct seq_operations tracer_seq_ops = {
3846         .start          = s_start,
3847         .next           = s_next,
3848         .stop           = s_stop,
3849         .show           = s_show,
3850 };
3851
3852 static struct trace_iterator *
3853 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3854 {
3855         struct trace_array *tr = inode->i_private;
3856         struct trace_iterator *iter;
3857         int cpu;
3858
3859         if (tracing_disabled)
3860                 return ERR_PTR(-ENODEV);
3861
3862         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3863         if (!iter)
3864                 return ERR_PTR(-ENOMEM);
3865
3866         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3867                                     GFP_KERNEL);
3868         if (!iter->buffer_iter)
3869                 goto release;
3870
3871         /*
3872          * We make a copy of the current tracer to avoid concurrent
3873          * changes on it while we are reading.
3874          */
3875         mutex_lock(&trace_types_lock);
3876         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3877         if (!iter->trace)
3878                 goto fail;
3879
3880         *iter->trace = *tr->current_trace;
3881
3882         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3883                 goto fail;
3884
3885         iter->tr = tr;
3886
3887 #ifdef CONFIG_TRACER_MAX_TRACE
3888         /* Currently only the top directory has a snapshot */
3889         if (tr->current_trace->print_max || snapshot)
3890                 iter->trace_buffer = &tr->max_buffer;
3891         else
3892 #endif
3893                 iter->trace_buffer = &tr->trace_buffer;
3894         iter->snapshot = snapshot;
3895         iter->pos = -1;
3896         iter->cpu_file = tracing_get_cpu(inode);
3897         mutex_init(&iter->mutex);
3898
3899         /* Notify the tracer early; before we stop tracing. */
3900         if (iter->trace && iter->trace->open)
3901                 iter->trace->open(iter);
3902
3903         /* Annotate start of buffers if we had overruns */
3904         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3905                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3906
3907         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3908         if (trace_clocks[tr->clock_id].in_ns)
3909                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3910
3911         /* stop the trace while dumping if we are not opening "snapshot" */
3912         if (!iter->snapshot)
3913                 tracing_stop_tr(tr);
3914
3915         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3916                 for_each_tracing_cpu(cpu) {
3917                         iter->buffer_iter[cpu] =
3918                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3919                                                          cpu, GFP_KERNEL);
3920                 }
3921                 ring_buffer_read_prepare_sync();
3922                 for_each_tracing_cpu(cpu) {
3923                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3924                         tracing_iter_reset(iter, cpu);
3925                 }
3926         } else {
3927                 cpu = iter->cpu_file;
3928                 iter->buffer_iter[cpu] =
3929                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3930                                                  cpu, GFP_KERNEL);
3931                 ring_buffer_read_prepare_sync();
3932                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3933                 tracing_iter_reset(iter, cpu);
3934         }
3935
3936         mutex_unlock(&trace_types_lock);
3937
3938         return iter;
3939
3940  fail:
3941         mutex_unlock(&trace_types_lock);
3942         kfree(iter->trace);
3943         kfree(iter->buffer_iter);
3944 release:
3945         seq_release_private(inode, file);
3946         return ERR_PTR(-ENOMEM);
3947 }
3948
3949 int tracing_open_generic(struct inode *inode, struct file *filp)
3950 {
3951         if (tracing_disabled)
3952                 return -ENODEV;
3953
3954         filp->private_data = inode->i_private;
3955         return 0;
3956 }
3957
3958 bool tracing_is_disabled(void)
3959 {
3960         return (tracing_disabled) ? true: false;
3961 }
3962
3963 /*
3964  * Open and update trace_array ref count.
3965  * Must have the current trace_array passed to it.
3966  */
3967 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3968 {
3969         struct trace_array *tr = inode->i_private;
3970
3971         if (tracing_disabled)
3972                 return -ENODEV;
3973
3974         if (trace_array_get(tr) < 0)
3975                 return -ENODEV;
3976
3977         filp->private_data = inode->i_private;
3978
3979         return 0;
3980 }
3981
3982 static int tracing_release(struct inode *inode, struct file *file)
3983 {
3984         struct trace_array *tr = inode->i_private;
3985         struct seq_file *m = file->private_data;
3986         struct trace_iterator *iter;
3987         int cpu;
3988
3989         if (!(file->f_mode & FMODE_READ)) {
3990                 trace_array_put(tr);
3991                 return 0;
3992         }
3993
3994         /* Writes do not use seq_file */
3995         iter = m->private;
3996         mutex_lock(&trace_types_lock);
3997
3998         for_each_tracing_cpu(cpu) {
3999                 if (iter->buffer_iter[cpu])
4000                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4001         }
4002
4003         if (iter->trace && iter->trace->close)
4004                 iter->trace->close(iter);
4005
4006         if (!iter->snapshot)
4007                 /* reenable tracing if it was previously enabled */
4008                 tracing_start_tr(tr);
4009
4010         __trace_array_put(tr);
4011
4012         mutex_unlock(&trace_types_lock);
4013
4014         mutex_destroy(&iter->mutex);
4015         free_cpumask_var(iter->started);
4016         kfree(iter->trace);
4017         kfree(iter->buffer_iter);
4018         seq_release_private(inode, file);
4019
4020         return 0;
4021 }
4022
4023 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4024 {
4025         struct trace_array *tr = inode->i_private;
4026
4027         trace_array_put(tr);
4028         return 0;
4029 }
4030
4031 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4032 {
4033         struct trace_array *tr = inode->i_private;
4034
4035         trace_array_put(tr);
4036
4037         return single_release(inode, file);
4038 }
4039
4040 static int tracing_open(struct inode *inode, struct file *file)
4041 {
4042         struct trace_array *tr = inode->i_private;
4043         struct trace_iterator *iter;
4044         int ret = 0;
4045
4046         if (trace_array_get(tr) < 0)
4047                 return -ENODEV;
4048
4049         /* If this file was open for write, then erase contents */
4050         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4051                 int cpu = tracing_get_cpu(inode);
4052                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4053
4054 #ifdef CONFIG_TRACER_MAX_TRACE
4055                 if (tr->current_trace->print_max)
4056                         trace_buf = &tr->max_buffer;
4057 #endif
4058
4059                 if (cpu == RING_BUFFER_ALL_CPUS)
4060                         tracing_reset_online_cpus(trace_buf);
4061                 else
4062                         tracing_reset(trace_buf, cpu);
4063         }
4064
4065         if (file->f_mode & FMODE_READ) {
4066                 iter = __tracing_open(inode, file, false);
4067                 if (IS_ERR(iter))
4068                         ret = PTR_ERR(iter);
4069                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4070                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4071         }
4072
4073         if (ret < 0)
4074                 trace_array_put(tr);
4075
4076         return ret;
4077 }
4078
4079 /*
4080  * Some tracers are not suitable for instance buffers.
4081  * A tracer is always available for the global array (toplevel)
4082  * or if it explicitly states that it is.
4083  */
4084 static bool
4085 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4086 {
4087         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4088 }
4089
4090 /* Find the next tracer that this trace array may use */
4091 static struct tracer *
4092 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4093 {
4094         while (t && !trace_ok_for_array(t, tr))
4095                 t = t->next;
4096
4097         return t;
4098 }
4099
4100 static void *
4101 t_next(struct seq_file *m, void *v, loff_t *pos)
4102 {
4103         struct trace_array *tr = m->private;
4104         struct tracer *t = v;
4105
4106         (*pos)++;
4107
4108         if (t)
4109                 t = get_tracer_for_array(tr, t->next);
4110
4111         return t;
4112 }
4113
4114 static void *t_start(struct seq_file *m, loff_t *pos)
4115 {
4116         struct trace_array *tr = m->private;
4117         struct tracer *t;
4118         loff_t l = 0;
4119
4120         mutex_lock(&trace_types_lock);
4121
4122         t = get_tracer_for_array(tr, trace_types);
4123         for (; t && l < *pos; t = t_next(m, t, &l))
4124                         ;
4125
4126         return t;
4127 }
4128
4129 static void t_stop(struct seq_file *m, void *p)
4130 {
4131         mutex_unlock(&trace_types_lock);
4132 }
4133
4134 static int t_show(struct seq_file *m, void *v)
4135 {
4136         struct tracer *t = v;
4137
4138         if (!t)
4139                 return 0;
4140
4141         seq_puts(m, t->name);
4142         if (t->next)
4143                 seq_putc(m, ' ');
4144         else
4145                 seq_putc(m, '\n');
4146
4147         return 0;
4148 }
4149
4150 static const struct seq_operations show_traces_seq_ops = {
4151         .start          = t_start,
4152         .next           = t_next,
4153         .stop           = t_stop,
4154         .show           = t_show,
4155 };
4156
4157 static int show_traces_open(struct inode *inode, struct file *file)
4158 {
4159         struct trace_array *tr = inode->i_private;
4160         struct seq_file *m;
4161         int ret;
4162
4163         if (tracing_disabled)
4164                 return -ENODEV;
4165
4166         if (trace_array_get(tr) < 0)
4167                 return -ENODEV;
4168
4169         ret = seq_open(file, &show_traces_seq_ops);
4170         if (ret) {
4171                 trace_array_put(tr);
4172                 return ret;
4173         }
4174
4175         m = file->private_data;
4176         m->private = tr;
4177
4178         return 0;
4179 }
4180
4181 static int show_traces_release(struct inode *inode, struct file *file)
4182 {
4183         struct trace_array *tr = inode->i_private;
4184
4185         trace_array_put(tr);
4186         return seq_release(inode, file);
4187 }
4188
4189 static ssize_t
4190 tracing_write_stub(struct file *filp, const char __user *ubuf,
4191                    size_t count, loff_t *ppos)
4192 {
4193         return count;
4194 }
4195
4196 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4197 {
4198         int ret;
4199
4200         if (file->f_mode & FMODE_READ)
4201                 ret = seq_lseek(file, offset, whence);
4202         else
4203                 file->f_pos = ret = 0;
4204
4205         return ret;
4206 }
4207
4208 static const struct file_operations tracing_fops = {
4209         .open           = tracing_open,
4210         .read           = seq_read,
4211         .write          = tracing_write_stub,
4212         .llseek         = tracing_lseek,
4213         .release        = tracing_release,
4214 };
4215
4216 static const struct file_operations show_traces_fops = {
4217         .open           = show_traces_open,
4218         .read           = seq_read,
4219         .llseek         = seq_lseek,
4220         .release        = show_traces_release,
4221 };
4222
4223 static ssize_t
4224 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4225                      size_t count, loff_t *ppos)
4226 {
4227         struct trace_array *tr = file_inode(filp)->i_private;
4228         char *mask_str;
4229         int len;
4230
4231         len = snprintf(NULL, 0, "%*pb\n",
4232                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4233         mask_str = kmalloc(len, GFP_KERNEL);
4234         if (!mask_str)
4235                 return -ENOMEM;
4236
4237         len = snprintf(mask_str, len, "%*pb\n",
4238                        cpumask_pr_args(tr->tracing_cpumask));
4239         if (len >= count) {
4240                 count = -EINVAL;
4241                 goto out_err;
4242         }
4243         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4244
4245 out_err:
4246         kfree(mask_str);
4247
4248         return count;
4249 }
4250
4251 static ssize_t
4252 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4253                       size_t count, loff_t *ppos)
4254 {
4255         struct trace_array *tr = file_inode(filp)->i_private;
4256         cpumask_var_t tracing_cpumask_new;
4257         int err, cpu;
4258
4259         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4260                 return -ENOMEM;
4261
4262         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4263         if (err)
4264                 goto err_unlock;
4265
4266         local_irq_disable();
4267         arch_spin_lock(&tr->max_lock);
4268         for_each_tracing_cpu(cpu) {
4269                 /*
4270                  * Increase/decrease the disabled counter if we are
4271                  * about to flip a bit in the cpumask:
4272                  */
4273                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4274                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4275                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4276                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4277                 }
4278                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4279                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4280                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4281                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4282                 }
4283         }
4284         arch_spin_unlock(&tr->max_lock);
4285         local_irq_enable();
4286
4287         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4288         free_cpumask_var(tracing_cpumask_new);
4289
4290         return count;
4291
4292 err_unlock:
4293         free_cpumask_var(tracing_cpumask_new);
4294
4295         return err;
4296 }
4297
4298 static const struct file_operations tracing_cpumask_fops = {
4299         .open           = tracing_open_generic_tr,
4300         .read           = tracing_cpumask_read,
4301         .write          = tracing_cpumask_write,
4302         .release        = tracing_release_generic_tr,
4303         .llseek         = generic_file_llseek,
4304 };
4305
4306 static int tracing_trace_options_show(struct seq_file *m, void *v)
4307 {
4308         struct tracer_opt *trace_opts;
4309         struct trace_array *tr = m->private;
4310         u32 tracer_flags;
4311         int i;
4312
4313         mutex_lock(&trace_types_lock);
4314         tracer_flags = tr->current_trace->flags->val;
4315         trace_opts = tr->current_trace->flags->opts;
4316
4317         for (i = 0; trace_options[i]; i++) {
4318                 if (tr->trace_flags & (1 << i))
4319                         seq_printf(m, "%s\n", trace_options[i]);
4320                 else
4321                         seq_printf(m, "no%s\n", trace_options[i]);
4322         }
4323
4324         for (i = 0; trace_opts[i].name; i++) {
4325                 if (tracer_flags & trace_opts[i].bit)
4326                         seq_printf(m, "%s\n", trace_opts[i].name);
4327                 else
4328                         seq_printf(m, "no%s\n", trace_opts[i].name);
4329         }
4330         mutex_unlock(&trace_types_lock);
4331
4332         return 0;
4333 }
4334
4335 static int __set_tracer_option(struct trace_array *tr,
4336                                struct tracer_flags *tracer_flags,
4337                                struct tracer_opt *opts, int neg)
4338 {
4339         struct tracer *trace = tracer_flags->trace;
4340         int ret;
4341
4342         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4343         if (ret)
4344                 return ret;
4345
4346         if (neg)
4347                 tracer_flags->val &= ~opts->bit;
4348         else
4349                 tracer_flags->val |= opts->bit;
4350         return 0;
4351 }
4352
4353 /* Try to assign a tracer specific option */
4354 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4355 {
4356         struct tracer *trace = tr->current_trace;
4357         struct tracer_flags *tracer_flags = trace->flags;
4358         struct tracer_opt *opts = NULL;
4359         int i;
4360
4361         for (i = 0; tracer_flags->opts[i].name; i++) {
4362                 opts = &tracer_flags->opts[i];
4363
4364                 if (strcmp(cmp, opts->name) == 0)
4365                         return __set_tracer_option(tr, trace->flags, opts, neg);
4366         }
4367
4368         return -EINVAL;
4369 }
4370
4371 /* Some tracers require overwrite to stay enabled */
4372 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4373 {
4374         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4375                 return -1;
4376
4377         return 0;
4378 }
4379
4380 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4381 {
4382         int *map;
4383
4384         if ((mask == TRACE_ITER_RECORD_TGID) ||
4385             (mask == TRACE_ITER_RECORD_CMD))
4386                 lockdep_assert_held(&event_mutex);
4387
4388         /* do nothing if flag is already set */
4389         if (!!(tr->trace_flags & mask) == !!enabled)
4390                 return 0;
4391
4392         /* Give the tracer a chance to approve the change */
4393         if (tr->current_trace->flag_changed)
4394                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4395                         return -EINVAL;
4396
4397         if (enabled)
4398                 tr->trace_flags |= mask;
4399         else
4400                 tr->trace_flags &= ~mask;
4401
4402         if (mask == TRACE_ITER_RECORD_CMD)
4403                 trace_event_enable_cmd_record(enabled);
4404
4405         if (mask == TRACE_ITER_RECORD_TGID) {
4406                 if (!tgid_map) {
4407                         tgid_map_max = pid_max;
4408                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4409                                        GFP_KERNEL);
4410
4411                         /*
4412                          * Pairs with smp_load_acquire() in
4413                          * trace_find_tgid_ptr() to ensure that if it observes
4414                          * the tgid_map we just allocated then it also observes
4415                          * the corresponding tgid_map_max value.
4416                          */
4417                         smp_store_release(&tgid_map, map);
4418                 }
4419                 if (!tgid_map) {
4420                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4421                         return -ENOMEM;
4422                 }
4423
4424                 trace_event_enable_tgid_record(enabled);
4425         }
4426
4427         if (mask == TRACE_ITER_EVENT_FORK)
4428                 trace_event_follow_fork(tr, enabled);
4429
4430         if (mask == TRACE_ITER_FUNC_FORK)
4431                 ftrace_pid_follow_fork(tr, enabled);
4432
4433         if (mask == TRACE_ITER_OVERWRITE) {
4434                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4435 #ifdef CONFIG_TRACER_MAX_TRACE
4436                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4437 #endif
4438         }
4439
4440         if (mask == TRACE_ITER_PRINTK) {
4441                 trace_printk_start_stop_comm(enabled);
4442                 trace_printk_control(enabled);
4443         }
4444
4445         return 0;
4446 }
4447
4448 static int trace_set_options(struct trace_array *tr, char *option)
4449 {
4450         char *cmp;
4451         int neg = 0;
4452         int ret;
4453         size_t orig_len = strlen(option);
4454
4455         cmp = strstrip(option);
4456
4457         if (strncmp(cmp, "no", 2) == 0) {
4458                 neg = 1;
4459                 cmp += 2;
4460         }
4461
4462         mutex_lock(&event_mutex);
4463         mutex_lock(&trace_types_lock);
4464
4465         ret = match_string(trace_options, -1, cmp);
4466         /* If no option could be set, test the specific tracer options */
4467         if (ret < 0)
4468                 ret = set_tracer_option(tr, cmp, neg);
4469         else
4470                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4471
4472         mutex_unlock(&trace_types_lock);
4473         mutex_unlock(&event_mutex);
4474
4475         /*
4476          * If the first trailing whitespace is replaced with '\0' by strstrip,
4477          * turn it back into a space.
4478          */
4479         if (orig_len > strlen(option))
4480                 option[strlen(option)] = ' ';
4481
4482         return ret;
4483 }
4484
4485 static void __init apply_trace_boot_options(void)
4486 {
4487         char *buf = trace_boot_options_buf;
4488         char *option;
4489
4490         while (true) {
4491                 option = strsep(&buf, ",");
4492
4493                 if (!option)
4494                         break;
4495
4496                 if (*option)
4497                         trace_set_options(&global_trace, option);
4498
4499                 /* Put back the comma to allow this to be called again */
4500                 if (buf)
4501                         *(buf - 1) = ',';
4502         }
4503 }
4504
4505 static ssize_t
4506 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4507                         size_t cnt, loff_t *ppos)
4508 {
4509         struct seq_file *m = filp->private_data;
4510         struct trace_array *tr = m->private;
4511         char buf[64];
4512         int ret;
4513
4514         if (cnt >= sizeof(buf))
4515                 return -EINVAL;
4516
4517         if (copy_from_user(buf, ubuf, cnt))
4518                 return -EFAULT;
4519
4520         buf[cnt] = 0;
4521
4522         ret = trace_set_options(tr, buf);
4523         if (ret < 0)
4524                 return ret;
4525
4526         *ppos += cnt;
4527
4528         return cnt;
4529 }
4530
4531 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4532 {
4533         struct trace_array *tr = inode->i_private;
4534         int ret;
4535
4536         if (tracing_disabled)
4537                 return -ENODEV;
4538
4539         if (trace_array_get(tr) < 0)
4540                 return -ENODEV;
4541
4542         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4543         if (ret < 0)
4544                 trace_array_put(tr);
4545
4546         return ret;
4547 }
4548
4549 static const struct file_operations tracing_iter_fops = {
4550         .open           = tracing_trace_options_open,
4551         .read           = seq_read,
4552         .llseek         = seq_lseek,
4553         .release        = tracing_single_release_tr,
4554         .write          = tracing_trace_options_write,
4555 };
4556
4557 static const char readme_msg[] =
4558         "tracing mini-HOWTO:\n\n"
4559         "# echo 0 > tracing_on : quick way to disable tracing\n"
4560         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4561         " Important files:\n"
4562         "  trace\t\t\t- The static contents of the buffer\n"
4563         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4564         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4565         "  current_tracer\t- function and latency tracers\n"
4566         "  available_tracers\t- list of configured tracers for current_tracer\n"
4567         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4568         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4569         "  trace_clock\t\t-change the clock used to order events\n"
4570         "       local:   Per cpu clock but may not be synced across CPUs\n"
4571         "      global:   Synced across CPUs but slows tracing down.\n"
4572         "     counter:   Not a clock, but just an increment\n"
4573         "      uptime:   Jiffy counter from time of boot\n"
4574         "        perf:   Same clock that perf events use\n"
4575 #ifdef CONFIG_X86_64
4576         "     x86-tsc:   TSC cycle counter\n"
4577 #endif
4578         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4579         "       delta:   Delta difference against a buffer-wide timestamp\n"
4580         "    absolute:   Absolute (standalone) timestamp\n"
4581         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4582         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4583         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4584         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4585         "\t\t\t  Remove sub-buffer with rmdir\n"
4586         "  trace_options\t\t- Set format or modify how tracing happens\n"
4587         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4588         "\t\t\t  option name\n"
4589         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4590 #ifdef CONFIG_DYNAMIC_FTRACE
4591         "\n  available_filter_functions - list of functions that can be filtered on\n"
4592         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4593         "\t\t\t  functions\n"
4594         "\t     accepts: func_full_name or glob-matching-pattern\n"
4595         "\t     modules: Can select a group via module\n"
4596         "\t      Format: :mod:<module-name>\n"
4597         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4598         "\t    triggers: a command to perform when function is hit\n"
4599         "\t      Format: <function>:<trigger>[:count]\n"
4600         "\t     trigger: traceon, traceoff\n"
4601         "\t\t      enable_event:<system>:<event>\n"
4602         "\t\t      disable_event:<system>:<event>\n"
4603 #ifdef CONFIG_STACKTRACE
4604         "\t\t      stacktrace\n"
4605 #endif
4606 #ifdef CONFIG_TRACER_SNAPSHOT
4607         "\t\t      snapshot\n"
4608 #endif
4609         "\t\t      dump\n"
4610         "\t\t      cpudump\n"
4611         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4612         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4613         "\t     The first one will disable tracing every time do_fault is hit\n"
4614         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4615         "\t       The first time do trap is hit and it disables tracing, the\n"
4616         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4617         "\t       the counter will not decrement. It only decrements when the\n"
4618         "\t       trigger did work\n"
4619         "\t     To remove trigger without count:\n"
4620         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4621         "\t     To remove trigger with a count:\n"
4622         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4623         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4624         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4625         "\t    modules: Can select a group via module command :mod:\n"
4626         "\t    Does not accept triggers\n"
4627 #endif /* CONFIG_DYNAMIC_FTRACE */
4628 #ifdef CONFIG_FUNCTION_TRACER
4629         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4630         "\t\t    (function)\n"
4631 #endif
4632 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4633         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4634         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4635         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4636 #endif
4637 #ifdef CONFIG_TRACER_SNAPSHOT
4638         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4639         "\t\t\t  snapshot buffer. Read the contents for more\n"
4640         "\t\t\t  information\n"
4641 #endif
4642 #ifdef CONFIG_STACK_TRACER
4643         "  stack_trace\t\t- Shows the max stack trace when active\n"
4644         "  stack_max_size\t- Shows current max stack size that was traced\n"
4645         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4646         "\t\t\t  new trace)\n"
4647 #ifdef CONFIG_DYNAMIC_FTRACE
4648         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4649         "\t\t\t  traces\n"
4650 #endif
4651 #endif /* CONFIG_STACK_TRACER */
4652 #ifdef CONFIG_KPROBE_EVENTS
4653         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4654         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4655 #endif
4656 #ifdef CONFIG_UPROBE_EVENTS
4657         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4658         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4659 #endif
4660 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4661         "\t  accepts: event-definitions (one definition per line)\n"
4662         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4663         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4664         "\t           -:[<group>/]<event>\n"
4665 #ifdef CONFIG_KPROBE_EVENTS
4666         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4667   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4668 #endif
4669 #ifdef CONFIG_UPROBE_EVENTS
4670         "\t    place: <path>:<offset>\n"
4671 #endif
4672         "\t     args: <name>=fetcharg[:type]\n"
4673         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4674         "\t           $stack<index>, $stack, $retval, $comm\n"
4675         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4676         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4677 #endif
4678         "  events/\t\t- Directory containing all trace event subsystems:\n"
4679         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4680         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4681         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4682         "\t\t\t  events\n"
4683         "      filter\t\t- If set, only events passing filter are traced\n"
4684         "  events/<system>/<event>/\t- Directory containing control files for\n"
4685         "\t\t\t  <event>:\n"
4686         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4687         "      filter\t\t- If set, only events passing filter are traced\n"
4688         "      trigger\t\t- If set, a command to perform when event is hit\n"
4689         "\t    Format: <trigger>[:count][if <filter>]\n"
4690         "\t   trigger: traceon, traceoff\n"
4691         "\t            enable_event:<system>:<event>\n"
4692         "\t            disable_event:<system>:<event>\n"
4693 #ifdef CONFIG_HIST_TRIGGERS
4694         "\t            enable_hist:<system>:<event>\n"
4695         "\t            disable_hist:<system>:<event>\n"
4696 #endif
4697 #ifdef CONFIG_STACKTRACE
4698         "\t\t    stacktrace\n"
4699 #endif
4700 #ifdef CONFIG_TRACER_SNAPSHOT
4701         "\t\t    snapshot\n"
4702 #endif
4703 #ifdef CONFIG_HIST_TRIGGERS
4704         "\t\t    hist (see below)\n"
4705 #endif
4706         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4707         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4708         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4709         "\t                  events/block/block_unplug/trigger\n"
4710         "\t   The first disables tracing every time block_unplug is hit.\n"
4711         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4712         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4713         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4714         "\t   Like function triggers, the counter is only decremented if it\n"
4715         "\t    enabled or disabled tracing.\n"
4716         "\t   To remove a trigger without a count:\n"
4717         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4718         "\t   To remove a trigger with a count:\n"
4719         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4720         "\t   Filters can be ignored when removing a trigger.\n"
4721 #ifdef CONFIG_HIST_TRIGGERS
4722         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4723         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4724         "\t            [:values=<field1[,field2,...]>]\n"
4725         "\t            [:sort=<field1[,field2,...]>]\n"
4726         "\t            [:size=#entries]\n"
4727         "\t            [:pause][:continue][:clear]\n"
4728         "\t            [:name=histname1]\n"
4729         "\t            [if <filter>]\n\n"
4730         "\t    Note, special fields can be used as well:\n"
4731         "\t            common_timestamp - to record current timestamp\n"
4732         "\t            common_cpu - to record the CPU the event happened on\n"
4733         "\n"
4734         "\t    When a matching event is hit, an entry is added to a hash\n"
4735         "\t    table using the key(s) and value(s) named, and the value of a\n"
4736         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4737         "\t    correspond to fields in the event's format description.  Keys\n"
4738         "\t    can be any field, or the special string 'stacktrace'.\n"
4739         "\t    Compound keys consisting of up to two fields can be specified\n"
4740         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4741         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4742         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4743         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4744         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4745         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4746         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4747         "\t    its histogram data will be shared with other triggers of the\n"
4748         "\t    same name, and trigger hits will update this common data.\n\n"
4749         "\t    Reading the 'hist' file for the event will dump the hash\n"
4750         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4751         "\t    triggers attached to an event, there will be a table for each\n"
4752         "\t    trigger in the output.  The table displayed for a named\n"
4753         "\t    trigger will be the same as any other instance having the\n"
4754         "\t    same name.  The default format used to display a given field\n"
4755         "\t    can be modified by appending any of the following modifiers\n"
4756         "\t    to the field name, as applicable:\n\n"
4757         "\t            .hex        display a number as a hex value\n"
4758         "\t            .sym        display an address as a symbol\n"
4759         "\t            .sym-offset display an address as a symbol and offset\n"
4760         "\t            .execname   display a common_pid as a program name\n"
4761         "\t            .syscall    display a syscall id as a syscall name\n"
4762         "\t            .log2       display log2 value rather than raw number\n"
4763         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4764         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4765         "\t    trigger or to start a hist trigger but not log any events\n"
4766         "\t    until told to do so.  'continue' can be used to start or\n"
4767         "\t    restart a paused hist trigger.\n\n"
4768         "\t    The 'clear' parameter will clear the contents of a running\n"
4769         "\t    hist trigger and leave its current paused/active state\n"
4770         "\t    unchanged.\n\n"
4771         "\t    The enable_hist and disable_hist triggers can be used to\n"
4772         "\t    have one event conditionally start and stop another event's\n"
4773         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4774         "\t    the enable_event and disable_event triggers.\n"
4775 #endif
4776 ;
4777
4778 static ssize_t
4779 tracing_readme_read(struct file *filp, char __user *ubuf,
4780                        size_t cnt, loff_t *ppos)
4781 {
4782         return simple_read_from_buffer(ubuf, cnt, ppos,
4783                                         readme_msg, strlen(readme_msg));
4784 }
4785
4786 static const struct file_operations tracing_readme_fops = {
4787         .open           = tracing_open_generic,
4788         .read           = tracing_readme_read,
4789         .llseek         = generic_file_llseek,
4790 };
4791
4792 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4793 {
4794         int pid = ++(*pos);
4795
4796         return trace_find_tgid_ptr(pid);
4797 }
4798
4799 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4800 {
4801         int pid = *pos;
4802
4803         return trace_find_tgid_ptr(pid);
4804 }
4805
4806 static void saved_tgids_stop(struct seq_file *m, void *v)
4807 {
4808 }
4809
4810 static int saved_tgids_show(struct seq_file *m, void *v)
4811 {
4812         int *entry = (int *)v;
4813         int pid = entry - tgid_map;
4814         int tgid = *entry;
4815
4816         if (tgid == 0)
4817                 return SEQ_SKIP;
4818
4819         seq_printf(m, "%d %d\n", pid, tgid);
4820         return 0;
4821 }
4822
4823 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4824         .start          = saved_tgids_start,
4825         .stop           = saved_tgids_stop,
4826         .next           = saved_tgids_next,
4827         .show           = saved_tgids_show,
4828 };
4829
4830 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4831 {
4832         if (tracing_disabled)
4833                 return -ENODEV;
4834
4835         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4836 }
4837
4838
4839 static const struct file_operations tracing_saved_tgids_fops = {
4840         .open           = tracing_saved_tgids_open,
4841         .read           = seq_read,
4842         .llseek         = seq_lseek,
4843         .release        = seq_release,
4844 };
4845
4846 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4847 {
4848         unsigned int *ptr = v;
4849
4850         if (*pos || m->count)
4851                 ptr++;
4852
4853         (*pos)++;
4854
4855         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4856              ptr++) {
4857                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4858                         continue;
4859
4860                 return ptr;
4861         }
4862
4863         return NULL;
4864 }
4865
4866 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4867 {
4868         void *v;
4869         loff_t l = 0;
4870
4871         preempt_disable();
4872         arch_spin_lock(&trace_cmdline_lock);
4873
4874         v = &savedcmd->map_cmdline_to_pid[0];
4875         while (l <= *pos) {
4876                 v = saved_cmdlines_next(m, v, &l);
4877                 if (!v)
4878                         return NULL;
4879         }
4880
4881         return v;
4882 }
4883
4884 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4885 {
4886         arch_spin_unlock(&trace_cmdline_lock);
4887         preempt_enable();
4888 }
4889
4890 static int saved_cmdlines_show(struct seq_file *m, void *v)
4891 {
4892         char buf[TASK_COMM_LEN];
4893         unsigned int *pid = v;
4894
4895         __trace_find_cmdline(*pid, buf);
4896         seq_printf(m, "%d %s\n", *pid, buf);
4897         return 0;
4898 }
4899
4900 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4901         .start          = saved_cmdlines_start,
4902         .next           = saved_cmdlines_next,
4903         .stop           = saved_cmdlines_stop,
4904         .show           = saved_cmdlines_show,
4905 };
4906
4907 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4908 {
4909         if (tracing_disabled)
4910                 return -ENODEV;
4911
4912         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4913 }
4914
4915 static const struct file_operations tracing_saved_cmdlines_fops = {
4916         .open           = tracing_saved_cmdlines_open,
4917         .read           = seq_read,
4918         .llseek         = seq_lseek,
4919         .release        = seq_release,
4920 };
4921
4922 static ssize_t
4923 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4924                                  size_t cnt, loff_t *ppos)
4925 {
4926         char buf[64];
4927         int r;
4928
4929         arch_spin_lock(&trace_cmdline_lock);
4930         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4931         arch_spin_unlock(&trace_cmdline_lock);
4932
4933         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4934 }
4935
4936 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4937 {
4938         kfree(s->saved_cmdlines);
4939         kfree(s->map_cmdline_to_pid);
4940         kfree(s);
4941 }
4942
4943 static int tracing_resize_saved_cmdlines(unsigned int val)
4944 {
4945         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4946
4947         s = kmalloc(sizeof(*s), GFP_KERNEL);
4948         if (!s)
4949                 return -ENOMEM;
4950
4951         if (allocate_cmdlines_buffer(val, s) < 0) {
4952                 kfree(s);
4953                 return -ENOMEM;
4954         }
4955
4956         arch_spin_lock(&trace_cmdline_lock);
4957         savedcmd_temp = savedcmd;
4958         savedcmd = s;
4959         arch_spin_unlock(&trace_cmdline_lock);
4960         free_saved_cmdlines_buffer(savedcmd_temp);
4961
4962         return 0;
4963 }
4964
4965 static ssize_t
4966 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4967                                   size_t cnt, loff_t *ppos)
4968 {
4969         unsigned long val;
4970         int ret;
4971
4972         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4973         if (ret)
4974                 return ret;
4975
4976         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4977         if (!val || val > PID_MAX_DEFAULT)
4978                 return -EINVAL;
4979
4980         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4981         if (ret < 0)
4982                 return ret;
4983
4984         *ppos += cnt;
4985
4986         return cnt;
4987 }
4988
4989 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4990         .open           = tracing_open_generic,
4991         .read           = tracing_saved_cmdlines_size_read,
4992         .write          = tracing_saved_cmdlines_size_write,
4993 };
4994
4995 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4996 static union trace_eval_map_item *
4997 update_eval_map(union trace_eval_map_item *ptr)
4998 {
4999         if (!ptr->map.eval_string) {
5000                 if (ptr->tail.next) {
5001                         ptr = ptr->tail.next;
5002                         /* Set ptr to the next real item (skip head) */
5003                         ptr++;
5004                 } else
5005                         return NULL;
5006         }
5007         return ptr;
5008 }
5009
5010 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5011 {
5012         union trace_eval_map_item *ptr = v;
5013
5014         /*
5015          * Paranoid! If ptr points to end, we don't want to increment past it.
5016          * This really should never happen.
5017          */
5018         ptr = update_eval_map(ptr);
5019         if (WARN_ON_ONCE(!ptr))
5020                 return NULL;
5021
5022         ptr++;
5023
5024         (*pos)++;
5025
5026         ptr = update_eval_map(ptr);
5027
5028         return ptr;
5029 }
5030
5031 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5032 {
5033         union trace_eval_map_item *v;
5034         loff_t l = 0;
5035
5036         mutex_lock(&trace_eval_mutex);
5037
5038         v = trace_eval_maps;
5039         if (v)
5040                 v++;
5041
5042         while (v && l < *pos) {
5043                 v = eval_map_next(m, v, &l);
5044         }
5045
5046         return v;
5047 }
5048
5049 static void eval_map_stop(struct seq_file *m, void *v)
5050 {
5051         mutex_unlock(&trace_eval_mutex);
5052 }
5053
5054 static int eval_map_show(struct seq_file *m, void *v)
5055 {
5056         union trace_eval_map_item *ptr = v;
5057
5058         seq_printf(m, "%s %ld (%s)\n",
5059                    ptr->map.eval_string, ptr->map.eval_value,
5060                    ptr->map.system);
5061
5062         return 0;
5063 }
5064
5065 static const struct seq_operations tracing_eval_map_seq_ops = {
5066         .start          = eval_map_start,
5067         .next           = eval_map_next,
5068         .stop           = eval_map_stop,
5069         .show           = eval_map_show,
5070 };
5071
5072 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5073 {
5074         if (tracing_disabled)
5075                 return -ENODEV;
5076
5077         return seq_open(filp, &tracing_eval_map_seq_ops);
5078 }
5079
5080 static const struct file_operations tracing_eval_map_fops = {
5081         .open           = tracing_eval_map_open,
5082         .read           = seq_read,
5083         .llseek         = seq_lseek,
5084         .release        = seq_release,
5085 };
5086
5087 static inline union trace_eval_map_item *
5088 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5089 {
5090         /* Return tail of array given the head */
5091         return ptr + ptr->head.length + 1;
5092 }
5093
5094 static void
5095 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5096                            int len)
5097 {
5098         struct trace_eval_map **stop;
5099         struct trace_eval_map **map;
5100         union trace_eval_map_item *map_array;
5101         union trace_eval_map_item *ptr;
5102
5103         stop = start + len;
5104
5105         /*
5106          * The trace_eval_maps contains the map plus a head and tail item,
5107          * where the head holds the module and length of array, and the
5108          * tail holds a pointer to the next list.
5109          */
5110         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5111         if (!map_array) {
5112                 pr_warn("Unable to allocate trace eval mapping\n");
5113                 return;
5114         }
5115
5116         mutex_lock(&trace_eval_mutex);
5117
5118         if (!trace_eval_maps)
5119                 trace_eval_maps = map_array;
5120         else {
5121                 ptr = trace_eval_maps;
5122                 for (;;) {
5123                         ptr = trace_eval_jmp_to_tail(ptr);
5124                         if (!ptr->tail.next)
5125                                 break;
5126                         ptr = ptr->tail.next;
5127
5128                 }
5129                 ptr->tail.next = map_array;
5130         }
5131         map_array->head.mod = mod;
5132         map_array->head.length = len;
5133         map_array++;
5134
5135         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5136                 map_array->map = **map;
5137                 map_array++;
5138         }
5139         memset(map_array, 0, sizeof(*map_array));
5140
5141         mutex_unlock(&trace_eval_mutex);
5142 }
5143
5144 static void trace_create_eval_file(struct dentry *d_tracer)
5145 {
5146         trace_create_file("eval_map", 0444, d_tracer,
5147                           NULL, &tracing_eval_map_fops);
5148 }
5149
5150 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5151 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5152 static inline void trace_insert_eval_map_file(struct module *mod,
5153                               struct trace_eval_map **start, int len) { }
5154 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5155
5156 static void trace_insert_eval_map(struct module *mod,
5157                                   struct trace_eval_map **start, int len)
5158 {
5159         struct trace_eval_map **map;
5160
5161         if (len <= 0)
5162                 return;
5163
5164         map = start;
5165
5166         trace_event_eval_update(map, len);
5167
5168         trace_insert_eval_map_file(mod, start, len);
5169 }
5170
5171 static ssize_t
5172 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5173                        size_t cnt, loff_t *ppos)
5174 {
5175         struct trace_array *tr = filp->private_data;
5176         char buf[MAX_TRACER_SIZE+2];
5177         int r;
5178
5179         mutex_lock(&trace_types_lock);
5180         r = sprintf(buf, "%s\n", tr->current_trace->name);
5181         mutex_unlock(&trace_types_lock);
5182
5183         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5184 }
5185
5186 int tracer_init(struct tracer *t, struct trace_array *tr)
5187 {
5188         tracing_reset_online_cpus(&tr->trace_buffer);
5189         return t->init(tr);
5190 }
5191
5192 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5193 {
5194         int cpu;
5195
5196         for_each_tracing_cpu(cpu)
5197                 per_cpu_ptr(buf->data, cpu)->entries = val;
5198 }
5199
5200 #ifdef CONFIG_TRACER_MAX_TRACE
5201 /* resize @tr's buffer to the size of @size_tr's entries */
5202 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5203                                         struct trace_buffer *size_buf, int cpu_id)
5204 {
5205         int cpu, ret = 0;
5206
5207         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5208                 for_each_tracing_cpu(cpu) {
5209                         ret = ring_buffer_resize(trace_buf->buffer,
5210                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5211                         if (ret < 0)
5212                                 break;
5213                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5214                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5215                 }
5216         } else {
5217                 ret = ring_buffer_resize(trace_buf->buffer,
5218                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5219                 if (ret == 0)
5220                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5221                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5222         }
5223
5224         return ret;
5225 }
5226 #endif /* CONFIG_TRACER_MAX_TRACE */
5227
5228 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5229                                         unsigned long size, int cpu)
5230 {
5231         int ret;
5232
5233         /*
5234          * If kernel or user changes the size of the ring buffer
5235          * we use the size that was given, and we can forget about
5236          * expanding it later.
5237          */
5238         ring_buffer_expanded = true;
5239
5240         /* May be called before buffers are initialized */
5241         if (!tr->trace_buffer.buffer)
5242                 return 0;
5243
5244         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5245         if (ret < 0)
5246                 return ret;
5247
5248 #ifdef CONFIG_TRACER_MAX_TRACE
5249         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5250             !tr->current_trace->use_max_tr)
5251                 goto out;
5252
5253         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5254         if (ret < 0) {
5255                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5256                                                      &tr->trace_buffer, cpu);
5257                 if (r < 0) {
5258                         /*
5259                          * AARGH! We are left with different
5260                          * size max buffer!!!!
5261                          * The max buffer is our "snapshot" buffer.
5262                          * When a tracer needs a snapshot (one of the
5263                          * latency tracers), it swaps the max buffer
5264                          * with the saved snap shot. We succeeded to
5265                          * update the size of the main buffer, but failed to
5266                          * update the size of the max buffer. But when we tried
5267                          * to reset the main buffer to the original size, we
5268                          * failed there too. This is very unlikely to
5269                          * happen, but if it does, warn and kill all
5270                          * tracing.
5271                          */
5272                         WARN_ON(1);
5273                         tracing_disabled = 1;
5274                 }
5275                 return ret;
5276         }
5277
5278         if (cpu == RING_BUFFER_ALL_CPUS)
5279                 set_buffer_entries(&tr->max_buffer, size);
5280         else
5281                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5282
5283  out:
5284 #endif /* CONFIG_TRACER_MAX_TRACE */
5285
5286         if (cpu == RING_BUFFER_ALL_CPUS)
5287                 set_buffer_entries(&tr->trace_buffer, size);
5288         else
5289                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5290
5291         return ret;
5292 }
5293
5294 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5295                                           unsigned long size, int cpu_id)
5296 {
5297         int ret = size;
5298
5299         mutex_lock(&trace_types_lock);
5300
5301         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5302                 /* make sure, this cpu is enabled in the mask */
5303                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5304                         ret = -EINVAL;
5305                         goto out;
5306                 }
5307         }
5308
5309         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5310         if (ret < 0)
5311                 ret = -ENOMEM;
5312
5313 out:
5314         mutex_unlock(&trace_types_lock);
5315
5316         return ret;
5317 }
5318
5319
5320 /**
5321  * tracing_update_buffers - used by tracing facility to expand ring buffers
5322  *
5323  * To save on memory when the tracing is never used on a system with it
5324  * configured in. The ring buffers are set to a minimum size. But once
5325  * a user starts to use the tracing facility, then they need to grow
5326  * to their default size.
5327  *
5328  * This function is to be called when a tracer is about to be used.
5329  */
5330 int tracing_update_buffers(void)
5331 {
5332         int ret = 0;
5333
5334         mutex_lock(&trace_types_lock);
5335         if (!ring_buffer_expanded)
5336                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5337                                                 RING_BUFFER_ALL_CPUS);
5338         mutex_unlock(&trace_types_lock);
5339
5340         return ret;
5341 }
5342
5343 struct trace_option_dentry;
5344
5345 static void
5346 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5347
5348 /*
5349  * Used to clear out the tracer before deletion of an instance.
5350  * Must have trace_types_lock held.
5351  */
5352 static void tracing_set_nop(struct trace_array *tr)
5353 {
5354         if (tr->current_trace == &nop_trace)
5355                 return;
5356         
5357         tr->current_trace->enabled--;
5358
5359         if (tr->current_trace->reset)
5360                 tr->current_trace->reset(tr);
5361
5362         tr->current_trace = &nop_trace;
5363 }
5364
5365 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5366 {
5367         /* Only enable if the directory has been created already. */
5368         if (!tr->dir)
5369                 return;
5370
5371         create_trace_option_files(tr, t);
5372 }
5373
5374 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5375 {
5376         struct tracer *t;
5377 #ifdef CONFIG_TRACER_MAX_TRACE
5378         bool had_max_tr;
5379 #endif
5380         int ret = 0;
5381
5382         mutex_lock(&trace_types_lock);
5383
5384         if (!ring_buffer_expanded) {
5385                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5386                                                 RING_BUFFER_ALL_CPUS);
5387                 if (ret < 0)
5388                         goto out;
5389                 ret = 0;
5390         }
5391
5392         for (t = trace_types; t; t = t->next) {
5393                 if (strcmp(t->name, buf) == 0)
5394                         break;
5395         }
5396         if (!t) {
5397                 ret = -EINVAL;
5398                 goto out;
5399         }
5400         if (t == tr->current_trace)
5401                 goto out;
5402
5403         /* Some tracers won't work on kernel command line */
5404         if (system_state < SYSTEM_RUNNING && t->noboot) {
5405                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5406                         t->name);
5407                 goto out;
5408         }
5409
5410         /* Some tracers are only allowed for the top level buffer */
5411         if (!trace_ok_for_array(t, tr)) {
5412                 ret = -EINVAL;
5413                 goto out;
5414         }
5415
5416         /* If trace pipe files are being read, we can't change the tracer */
5417         if (tr->current_trace->ref) {
5418                 ret = -EBUSY;
5419                 goto out;
5420         }
5421
5422         trace_branch_disable();
5423
5424         tr->current_trace->enabled--;
5425
5426         if (tr->current_trace->reset)
5427                 tr->current_trace->reset(tr);
5428
5429         /* Current trace needs to be nop_trace before synchronize_sched */
5430         tr->current_trace = &nop_trace;
5431
5432 #ifdef CONFIG_TRACER_MAX_TRACE
5433         had_max_tr = tr->allocated_snapshot;
5434
5435         if (had_max_tr && !t->use_max_tr) {
5436                 /*
5437                  * We need to make sure that the update_max_tr sees that
5438                  * current_trace changed to nop_trace to keep it from
5439                  * swapping the buffers after we resize it.
5440                  * The update_max_tr is called from interrupts disabled
5441                  * so a synchronized_sched() is sufficient.
5442                  */
5443                 synchronize_sched();
5444                 free_snapshot(tr);
5445         }
5446 #endif
5447
5448 #ifdef CONFIG_TRACER_MAX_TRACE
5449         if (t->use_max_tr && !had_max_tr) {
5450                 ret = tracing_alloc_snapshot_instance(tr);
5451                 if (ret < 0)
5452                         goto out;
5453         }
5454 #endif
5455
5456         if (t->init) {
5457                 ret = tracer_init(t, tr);
5458                 if (ret)
5459                         goto out;
5460         }
5461
5462         tr->current_trace = t;
5463         tr->current_trace->enabled++;
5464         trace_branch_enable(tr);
5465  out:
5466         mutex_unlock(&trace_types_lock);
5467
5468         return ret;
5469 }
5470
5471 static ssize_t
5472 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5473                         size_t cnt, loff_t *ppos)
5474 {
5475         struct trace_array *tr = filp->private_data;
5476         char buf[MAX_TRACER_SIZE+1];
5477         int i;
5478         size_t ret;
5479         int err;
5480
5481         ret = cnt;
5482
5483         if (cnt > MAX_TRACER_SIZE)
5484                 cnt = MAX_TRACER_SIZE;
5485
5486         if (copy_from_user(buf, ubuf, cnt))
5487                 return -EFAULT;
5488
5489         buf[cnt] = 0;
5490
5491         /* strip ending whitespace. */
5492         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5493                 buf[i] = 0;
5494
5495         err = tracing_set_tracer(tr, buf);
5496         if (err)
5497                 return err;
5498
5499         *ppos += ret;
5500
5501         return ret;
5502 }
5503
5504 static ssize_t
5505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5506                    size_t cnt, loff_t *ppos)
5507 {
5508         char buf[64];
5509         int r;
5510
5511         r = snprintf(buf, sizeof(buf), "%ld\n",
5512                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5513         if (r > sizeof(buf))
5514                 r = sizeof(buf);
5515         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5516 }
5517
5518 static ssize_t
5519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5520                     size_t cnt, loff_t *ppos)
5521 {
5522         unsigned long val;
5523         int ret;
5524
5525         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5526         if (ret)
5527                 return ret;
5528
5529         *ptr = val * 1000;
5530
5531         return cnt;
5532 }
5533
5534 static ssize_t
5535 tracing_thresh_read(struct file *filp, char __user *ubuf,
5536                     size_t cnt, loff_t *ppos)
5537 {
5538         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5539 }
5540
5541 static ssize_t
5542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5543                      size_t cnt, loff_t *ppos)
5544 {
5545         struct trace_array *tr = filp->private_data;
5546         int ret;
5547
5548         mutex_lock(&trace_types_lock);
5549         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5550         if (ret < 0)
5551                 goto out;
5552
5553         if (tr->current_trace->update_thresh) {
5554                 ret = tr->current_trace->update_thresh(tr);
5555                 if (ret < 0)
5556                         goto out;
5557         }
5558
5559         ret = cnt;
5560 out:
5561         mutex_unlock(&trace_types_lock);
5562
5563         return ret;
5564 }
5565
5566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5567
5568 static ssize_t
5569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5570                      size_t cnt, loff_t *ppos)
5571 {
5572         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5573 }
5574
5575 static ssize_t
5576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5577                       size_t cnt, loff_t *ppos)
5578 {
5579         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5580 }
5581
5582 #endif
5583
5584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5585 {
5586         struct trace_array *tr = inode->i_private;
5587         struct trace_iterator *iter;
5588         int ret = 0;
5589
5590         if (tracing_disabled)
5591                 return -ENODEV;
5592
5593         if (trace_array_get(tr) < 0)
5594                 return -ENODEV;
5595
5596         mutex_lock(&trace_types_lock);
5597
5598         /* create a buffer to store the information to pass to userspace */
5599         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5600         if (!iter) {
5601                 ret = -ENOMEM;
5602                 __trace_array_put(tr);
5603                 goto out;
5604         }
5605
5606         trace_seq_init(&iter->seq);
5607         iter->trace = tr->current_trace;
5608
5609         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5610                 ret = -ENOMEM;
5611                 goto fail;
5612         }
5613
5614         /* trace pipe does not show start of buffer */
5615         cpumask_setall(iter->started);
5616
5617         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5618                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5619
5620         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5621         if (trace_clocks[tr->clock_id].in_ns)
5622                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5623
5624         iter->tr = tr;
5625         iter->trace_buffer = &tr->trace_buffer;
5626         iter->cpu_file = tracing_get_cpu(inode);
5627         mutex_init(&iter->mutex);
5628         filp->private_data = iter;
5629
5630         if (iter->trace->pipe_open)
5631                 iter->trace->pipe_open(iter);
5632
5633         nonseekable_open(inode, filp);
5634
5635         tr->current_trace->ref++;
5636 out:
5637         mutex_unlock(&trace_types_lock);
5638         return ret;
5639
5640 fail:
5641         kfree(iter);
5642         __trace_array_put(tr);
5643         mutex_unlock(&trace_types_lock);
5644         return ret;
5645 }
5646
5647 static int tracing_release_pipe(struct inode *inode, struct file *file)
5648 {
5649         struct trace_iterator *iter = file->private_data;
5650         struct trace_array *tr = inode->i_private;
5651
5652         mutex_lock(&trace_types_lock);
5653
5654         tr->current_trace->ref--;
5655
5656         if (iter->trace->pipe_close)
5657                 iter->trace->pipe_close(iter);
5658
5659         mutex_unlock(&trace_types_lock);
5660
5661         free_cpumask_var(iter->started);
5662         mutex_destroy(&iter->mutex);
5663         kfree(iter);
5664
5665         trace_array_put(tr);
5666
5667         return 0;
5668 }
5669
5670 static __poll_t
5671 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5672 {
5673         struct trace_array *tr = iter->tr;
5674
5675         /* Iterators are static, they should be filled or empty */
5676         if (trace_buffer_iter(iter, iter->cpu_file))
5677                 return EPOLLIN | EPOLLRDNORM;
5678
5679         if (tr->trace_flags & TRACE_ITER_BLOCK)
5680                 /*
5681                  * Always select as readable when in blocking mode
5682                  */
5683                 return EPOLLIN | EPOLLRDNORM;
5684         else
5685                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5686                                              filp, poll_table);
5687 }
5688
5689 static __poll_t
5690 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5691 {
5692         struct trace_iterator *iter = filp->private_data;
5693
5694         return trace_poll(iter, filp, poll_table);
5695 }
5696
5697 /* Must be called with iter->mutex held. */
5698 static int tracing_wait_pipe(struct file *filp)
5699 {
5700         struct trace_iterator *iter = filp->private_data;
5701         int ret;
5702
5703         while (trace_empty(iter)) {
5704
5705                 if ((filp->f_flags & O_NONBLOCK)) {
5706                         return -EAGAIN;
5707                 }
5708
5709                 /*
5710                  * We block until we read something and tracing is disabled.
5711                  * We still block if tracing is disabled, but we have never
5712                  * read anything. This allows a user to cat this file, and
5713                  * then enable tracing. But after we have read something,
5714                  * we give an EOF when tracing is again disabled.
5715                  *
5716                  * iter->pos will be 0 if we haven't read anything.
5717                  */
5718                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5719                         break;
5720
5721                 mutex_unlock(&iter->mutex);
5722
5723                 ret = wait_on_pipe(iter, false);
5724
5725                 mutex_lock(&iter->mutex);
5726
5727                 if (ret)
5728                         return ret;
5729         }
5730
5731         return 1;
5732 }
5733
5734 /*
5735  * Consumer reader.
5736  */
5737 static ssize_t
5738 tracing_read_pipe(struct file *filp, char __user *ubuf,
5739                   size_t cnt, loff_t *ppos)
5740 {
5741         struct trace_iterator *iter = filp->private_data;
5742         ssize_t sret;
5743
5744         /*
5745          * Avoid more than one consumer on a single file descriptor
5746          * This is just a matter of traces coherency, the ring buffer itself
5747          * is protected.
5748          */
5749         mutex_lock(&iter->mutex);
5750
5751         /* return any leftover data */
5752         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5753         if (sret != -EBUSY)
5754                 goto out;
5755
5756         trace_seq_init(&iter->seq);
5757
5758         if (iter->trace->read) {
5759                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5760                 if (sret)
5761                         goto out;
5762         }
5763
5764 waitagain:
5765         sret = tracing_wait_pipe(filp);
5766         if (sret <= 0)
5767                 goto out;
5768
5769         /* stop when tracing is finished */
5770         if (trace_empty(iter)) {
5771                 sret = 0;
5772                 goto out;
5773         }
5774
5775         if (cnt >= PAGE_SIZE)
5776                 cnt = PAGE_SIZE - 1;
5777
5778         /* reset all but tr, trace, and overruns */
5779         memset(&iter->seq, 0,
5780                sizeof(struct trace_iterator) -
5781                offsetof(struct trace_iterator, seq));
5782         cpumask_clear(iter->started);
5783         trace_seq_init(&iter->seq);
5784         iter->pos = -1;
5785
5786         trace_event_read_lock();
5787         trace_access_lock(iter->cpu_file);
5788         while (trace_find_next_entry_inc(iter) != NULL) {
5789                 enum print_line_t ret;
5790                 int save_len = iter->seq.seq.len;
5791
5792                 ret = print_trace_line(iter);
5793                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5794                         /* don't print partial lines */
5795                         iter->seq.seq.len = save_len;
5796                         break;
5797                 }
5798                 if (ret != TRACE_TYPE_NO_CONSUME)
5799                         trace_consume(iter);
5800
5801                 if (trace_seq_used(&iter->seq) >= cnt)
5802                         break;
5803
5804                 /*
5805                  * Setting the full flag means we reached the trace_seq buffer
5806                  * size and we should leave by partial output condition above.
5807                  * One of the trace_seq_* functions is not used properly.
5808                  */
5809                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5810                           iter->ent->type);
5811         }
5812         trace_access_unlock(iter->cpu_file);
5813         trace_event_read_unlock();
5814
5815         /* Now copy what we have to the user */
5816         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5817         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5818                 trace_seq_init(&iter->seq);
5819
5820         /*
5821          * If there was nothing to send to user, in spite of consuming trace
5822          * entries, go back to wait for more entries.
5823          */
5824         if (sret == -EBUSY)
5825                 goto waitagain;
5826
5827 out:
5828         mutex_unlock(&iter->mutex);
5829
5830         return sret;
5831 }
5832
5833 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5834                                      unsigned int idx)
5835 {
5836         __free_page(spd->pages[idx]);
5837 }
5838
5839 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5840         .can_merge              = 0,
5841         .confirm                = generic_pipe_buf_confirm,
5842         .release                = generic_pipe_buf_release,
5843         .steal                  = generic_pipe_buf_steal,
5844         .get                    = generic_pipe_buf_get,
5845 };
5846
5847 static size_t
5848 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5849 {
5850         size_t count;
5851         int save_len;
5852         int ret;
5853
5854         /* Seq buffer is page-sized, exactly what we need. */
5855         for (;;) {
5856                 save_len = iter->seq.seq.len;
5857                 ret = print_trace_line(iter);
5858
5859                 if (trace_seq_has_overflowed(&iter->seq)) {
5860                         iter->seq.seq.len = save_len;
5861                         break;
5862                 }
5863
5864                 /*
5865                  * This should not be hit, because it should only
5866                  * be set if the iter->seq overflowed. But check it
5867                  * anyway to be safe.
5868                  */
5869                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5870                         iter->seq.seq.len = save_len;
5871                         break;
5872                 }
5873
5874                 count = trace_seq_used(&iter->seq) - save_len;
5875                 if (rem < count) {
5876                         rem = 0;
5877                         iter->seq.seq.len = save_len;
5878                         break;
5879                 }
5880
5881                 if (ret != TRACE_TYPE_NO_CONSUME)
5882                         trace_consume(iter);
5883                 rem -= count;
5884                 if (!trace_find_next_entry_inc(iter))   {
5885                         rem = 0;
5886                         iter->ent = NULL;
5887                         break;
5888                 }
5889         }
5890
5891         return rem;
5892 }
5893
5894 static ssize_t tracing_splice_read_pipe(struct file *filp,
5895                                         loff_t *ppos,
5896                                         struct pipe_inode_info *pipe,
5897                                         size_t len,
5898                                         unsigned int flags)
5899 {
5900         struct page *pages_def[PIPE_DEF_BUFFERS];
5901         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5902         struct trace_iterator *iter = filp->private_data;
5903         struct splice_pipe_desc spd = {
5904                 .pages          = pages_def,
5905                 .partial        = partial_def,
5906                 .nr_pages       = 0, /* This gets updated below. */
5907                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5908                 .ops            = &tracing_pipe_buf_ops,
5909                 .spd_release    = tracing_spd_release_pipe,
5910         };
5911         ssize_t ret;
5912         size_t rem;
5913         unsigned int i;
5914
5915         if (splice_grow_spd(pipe, &spd))
5916                 return -ENOMEM;
5917
5918         mutex_lock(&iter->mutex);
5919
5920         if (iter->trace->splice_read) {
5921                 ret = iter->trace->splice_read(iter, filp,
5922                                                ppos, pipe, len, flags);
5923                 if (ret)
5924                         goto out_err;
5925         }
5926
5927         ret = tracing_wait_pipe(filp);
5928         if (ret <= 0)
5929                 goto out_err;
5930
5931         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5932                 ret = -EFAULT;
5933                 goto out_err;
5934         }
5935
5936         trace_event_read_lock();
5937         trace_access_lock(iter->cpu_file);
5938
5939         /* Fill as many pages as possible. */
5940         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5941                 spd.pages[i] = alloc_page(GFP_KERNEL);
5942                 if (!spd.pages[i])
5943                         break;
5944
5945                 rem = tracing_fill_pipe_page(rem, iter);
5946
5947                 /* Copy the data into the page, so we can start over. */
5948                 ret = trace_seq_to_buffer(&iter->seq,
5949                                           page_address(spd.pages[i]),
5950                                           trace_seq_used(&iter->seq));
5951                 if (ret < 0) {
5952                         __free_page(spd.pages[i]);
5953                         break;
5954                 }
5955                 spd.partial[i].offset = 0;
5956                 spd.partial[i].len = trace_seq_used(&iter->seq);
5957
5958                 trace_seq_init(&iter->seq);
5959         }
5960
5961         trace_access_unlock(iter->cpu_file);
5962         trace_event_read_unlock();
5963         mutex_unlock(&iter->mutex);
5964
5965         spd.nr_pages = i;
5966
5967         if (i)
5968                 ret = splice_to_pipe(pipe, &spd);
5969         else
5970                 ret = 0;
5971 out:
5972         splice_shrink_spd(&spd);
5973         return ret;
5974
5975 out_err:
5976         mutex_unlock(&iter->mutex);
5977         goto out;
5978 }
5979
5980 static ssize_t
5981 tracing_entries_read(struct file *filp, char __user *ubuf,
5982                      size_t cnt, loff_t *ppos)
5983 {
5984         struct inode *inode = file_inode(filp);
5985         struct trace_array *tr = inode->i_private;
5986         int cpu = tracing_get_cpu(inode);
5987         char buf[64];
5988         int r = 0;
5989         ssize_t ret;
5990
5991         mutex_lock(&trace_types_lock);
5992
5993         if (cpu == RING_BUFFER_ALL_CPUS) {
5994                 int cpu, buf_size_same;
5995                 unsigned long size;
5996
5997                 size = 0;
5998                 buf_size_same = 1;
5999                 /* check if all cpu sizes are same */
6000                 for_each_tracing_cpu(cpu) {
6001                         /* fill in the size from first enabled cpu */
6002                         if (size == 0)
6003                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6004                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6005                                 buf_size_same = 0;
6006                                 break;
6007                         }
6008                 }
6009
6010                 if (buf_size_same) {
6011                         if (!ring_buffer_expanded)
6012                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6013                                             size >> 10,
6014                                             trace_buf_size >> 10);
6015                         else
6016                                 r = sprintf(buf, "%lu\n", size >> 10);
6017                 } else
6018                         r = sprintf(buf, "X\n");
6019         } else
6020                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6021
6022         mutex_unlock(&trace_types_lock);
6023
6024         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6025         return ret;
6026 }
6027
6028 static ssize_t
6029 tracing_entries_write(struct file *filp, const char __user *ubuf,
6030                       size_t cnt, loff_t *ppos)
6031 {
6032         struct inode *inode = file_inode(filp);
6033         struct trace_array *tr = inode->i_private;
6034         unsigned long val;
6035         int ret;
6036
6037         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6038         if (ret)
6039                 return ret;
6040
6041         /* must have at least 1 entry */
6042         if (!val)
6043                 return -EINVAL;
6044
6045         /* value is in KB */
6046         val <<= 10;
6047         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6048         if (ret < 0)
6049                 return ret;
6050
6051         *ppos += cnt;
6052
6053         return cnt;
6054 }
6055
6056 static ssize_t
6057 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6058                                 size_t cnt, loff_t *ppos)
6059 {
6060         struct trace_array *tr = filp->private_data;
6061         char buf[64];
6062         int r, cpu;
6063         unsigned long size = 0, expanded_size = 0;
6064
6065         mutex_lock(&trace_types_lock);
6066         for_each_tracing_cpu(cpu) {
6067                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6068                 if (!ring_buffer_expanded)
6069                         expanded_size += trace_buf_size >> 10;
6070         }
6071         if (ring_buffer_expanded)
6072                 r = sprintf(buf, "%lu\n", size);
6073         else
6074                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6075         mutex_unlock(&trace_types_lock);
6076
6077         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6078 }
6079
6080 static ssize_t
6081 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6082                           size_t cnt, loff_t *ppos)
6083 {
6084         /*
6085          * There is no need to read what the user has written, this function
6086          * is just to make sure that there is no error when "echo" is used
6087          */
6088
6089         *ppos += cnt;
6090
6091         return cnt;
6092 }
6093
6094 static int
6095 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6096 {
6097         struct trace_array *tr = inode->i_private;
6098
6099         /* disable tracing ? */
6100         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6101                 tracer_tracing_off(tr);
6102         /* resize the ring buffer to 0 */
6103         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6104
6105         trace_array_put(tr);
6106
6107         return 0;
6108 }
6109
6110 static ssize_t
6111 tracing_mark_write(struct file *filp, const char __user *ubuf,
6112                                         size_t cnt, loff_t *fpos)
6113 {
6114         struct trace_array *tr = filp->private_data;
6115         struct ring_buffer_event *event;
6116         enum event_trigger_type tt = ETT_NONE;
6117         struct ring_buffer *buffer;
6118         struct print_entry *entry;
6119         unsigned long irq_flags;
6120         const char faulted[] = "<faulted>";
6121         ssize_t written;
6122         int size;
6123         int len;
6124
6125 /* Used in tracing_mark_raw_write() as well */
6126 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6127
6128         if (tracing_disabled)
6129                 return -EINVAL;
6130
6131         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6132                 return -EINVAL;
6133
6134         if (cnt > TRACE_BUF_SIZE)
6135                 cnt = TRACE_BUF_SIZE;
6136
6137         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6138
6139         local_save_flags(irq_flags);
6140         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6141
6142         /* If less than "<faulted>", then make sure we can still add that */
6143         if (cnt < FAULTED_SIZE)
6144                 size += FAULTED_SIZE - cnt;
6145
6146         buffer = tr->trace_buffer.buffer;
6147         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6148                                             irq_flags, preempt_count());
6149         if (unlikely(!event))
6150                 /* Ring buffer disabled, return as if not open for write */
6151                 return -EBADF;
6152
6153         entry = ring_buffer_event_data(event);
6154         entry->ip = _THIS_IP_;
6155
6156         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6157         if (len) {
6158                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6159                 cnt = FAULTED_SIZE;
6160                 written = -EFAULT;
6161         } else
6162                 written = cnt;
6163         len = cnt;
6164
6165         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6166                 /* do not add \n before testing triggers, but add \0 */
6167                 entry->buf[cnt] = '\0';
6168                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6169         }
6170
6171         if (entry->buf[cnt - 1] != '\n') {
6172                 entry->buf[cnt] = '\n';
6173                 entry->buf[cnt + 1] = '\0';
6174         } else
6175                 entry->buf[cnt] = '\0';
6176
6177         __buffer_unlock_commit(buffer, event);
6178
6179         if (tt)
6180                 event_triggers_post_call(tr->trace_marker_file, tt);
6181
6182         if (written > 0)
6183                 *fpos += written;
6184
6185         return written;
6186 }
6187
6188 /* Limit it for now to 3K (including tag) */
6189 #define RAW_DATA_MAX_SIZE (1024*3)
6190
6191 static ssize_t
6192 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6193                                         size_t cnt, loff_t *fpos)
6194 {
6195         struct trace_array *tr = filp->private_data;
6196         struct ring_buffer_event *event;
6197         struct ring_buffer *buffer;
6198         struct raw_data_entry *entry;
6199         const char faulted[] = "<faulted>";
6200         unsigned long irq_flags;
6201         ssize_t written;
6202         int size;
6203         int len;
6204
6205 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6206
6207         if (tracing_disabled)
6208                 return -EINVAL;
6209
6210         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6211                 return -EINVAL;
6212
6213         /* The marker must at least have a tag id */
6214         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6215                 return -EINVAL;
6216
6217         if (cnt > TRACE_BUF_SIZE)
6218                 cnt = TRACE_BUF_SIZE;
6219
6220         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6221
6222         local_save_flags(irq_flags);
6223         size = sizeof(*entry) + cnt;
6224         if (cnt < FAULT_SIZE_ID)
6225                 size += FAULT_SIZE_ID - cnt;
6226
6227         buffer = tr->trace_buffer.buffer;
6228         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6229                                             irq_flags, preempt_count());
6230         if (!event)
6231                 /* Ring buffer disabled, return as if not open for write */
6232                 return -EBADF;
6233
6234         entry = ring_buffer_event_data(event);
6235
6236         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6237         if (len) {
6238                 entry->id = -1;
6239                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6240                 written = -EFAULT;
6241         } else
6242                 written = cnt;
6243
6244         __buffer_unlock_commit(buffer, event);
6245
6246         if (written > 0)
6247                 *fpos += written;
6248
6249         return written;
6250 }
6251
6252 static int tracing_clock_show(struct seq_file *m, void *v)
6253 {
6254         struct trace_array *tr = m->private;
6255         int i;
6256
6257         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6258                 seq_printf(m,
6259                         "%s%s%s%s", i ? " " : "",
6260                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6261                         i == tr->clock_id ? "]" : "");
6262         seq_putc(m, '\n');
6263
6264         return 0;
6265 }
6266
6267 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6268 {
6269         int i;
6270
6271         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6272                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6273                         break;
6274         }
6275         if (i == ARRAY_SIZE(trace_clocks))
6276                 return -EINVAL;
6277
6278         mutex_lock(&trace_types_lock);
6279
6280         tr->clock_id = i;
6281
6282         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6283
6284         /*
6285          * New clock may not be consistent with the previous clock.
6286          * Reset the buffer so that it doesn't have incomparable timestamps.
6287          */
6288         tracing_reset_online_cpus(&tr->trace_buffer);
6289
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291         if (tr->max_buffer.buffer)
6292                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6293         tracing_reset_online_cpus(&tr->max_buffer);
6294 #endif
6295
6296         mutex_unlock(&trace_types_lock);
6297
6298         return 0;
6299 }
6300
6301 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6302                                    size_t cnt, loff_t *fpos)
6303 {
6304         struct seq_file *m = filp->private_data;
6305         struct trace_array *tr = m->private;
6306         char buf[64];
6307         const char *clockstr;
6308         int ret;
6309
6310         if (cnt >= sizeof(buf))
6311                 return -EINVAL;
6312
6313         if (copy_from_user(buf, ubuf, cnt))
6314                 return -EFAULT;
6315
6316         buf[cnt] = 0;
6317
6318         clockstr = strstrip(buf);
6319
6320         ret = tracing_set_clock(tr, clockstr);
6321         if (ret)
6322                 return ret;
6323
6324         *fpos += cnt;
6325
6326         return cnt;
6327 }
6328
6329 static int tracing_clock_open(struct inode *inode, struct file *file)
6330 {
6331         struct trace_array *tr = inode->i_private;
6332         int ret;
6333
6334         if (tracing_disabled)
6335                 return -ENODEV;
6336
6337         if (trace_array_get(tr))
6338                 return -ENODEV;
6339
6340         ret = single_open(file, tracing_clock_show, inode->i_private);
6341         if (ret < 0)
6342                 trace_array_put(tr);
6343
6344         return ret;
6345 }
6346
6347 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6348 {
6349         struct trace_array *tr = m->private;
6350
6351         mutex_lock(&trace_types_lock);
6352
6353         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6354                 seq_puts(m, "delta [absolute]\n");
6355         else
6356                 seq_puts(m, "[delta] absolute\n");
6357
6358         mutex_unlock(&trace_types_lock);
6359
6360         return 0;
6361 }
6362
6363 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6364 {
6365         struct trace_array *tr = inode->i_private;
6366         int ret;
6367
6368         if (tracing_disabled)
6369                 return -ENODEV;
6370
6371         if (trace_array_get(tr))
6372                 return -ENODEV;
6373
6374         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6375         if (ret < 0)
6376                 trace_array_put(tr);
6377
6378         return ret;
6379 }
6380
6381 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6382 {
6383         int ret = 0;
6384
6385         mutex_lock(&trace_types_lock);
6386
6387         if (abs && tr->time_stamp_abs_ref++)
6388                 goto out;
6389
6390         if (!abs) {
6391                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6392                         ret = -EINVAL;
6393                         goto out;
6394                 }
6395
6396                 if (--tr->time_stamp_abs_ref)
6397                         goto out;
6398         }
6399
6400         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6401
6402 #ifdef CONFIG_TRACER_MAX_TRACE
6403         if (tr->max_buffer.buffer)
6404                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6405 #endif
6406  out:
6407         mutex_unlock(&trace_types_lock);
6408
6409         return ret;
6410 }
6411
6412 struct ftrace_buffer_info {
6413         struct trace_iterator   iter;
6414         void                    *spare;
6415         unsigned int            spare_cpu;
6416         unsigned int            read;
6417 };
6418
6419 #ifdef CONFIG_TRACER_SNAPSHOT
6420 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6421 {
6422         struct trace_array *tr = inode->i_private;
6423         struct trace_iterator *iter;
6424         struct seq_file *m;
6425         int ret = 0;
6426
6427         if (trace_array_get(tr) < 0)
6428                 return -ENODEV;
6429
6430         if (file->f_mode & FMODE_READ) {
6431                 iter = __tracing_open(inode, file, true);
6432                 if (IS_ERR(iter))
6433                         ret = PTR_ERR(iter);
6434         } else {
6435                 /* Writes still need the seq_file to hold the private data */
6436                 ret = -ENOMEM;
6437                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6438                 if (!m)
6439                         goto out;
6440                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6441                 if (!iter) {
6442                         kfree(m);
6443                         goto out;
6444                 }
6445                 ret = 0;
6446
6447                 iter->tr = tr;
6448                 iter->trace_buffer = &tr->max_buffer;
6449                 iter->cpu_file = tracing_get_cpu(inode);
6450                 m->private = iter;
6451                 file->private_data = m;
6452         }
6453 out:
6454         if (ret < 0)
6455                 trace_array_put(tr);
6456
6457         return ret;
6458 }
6459
6460 static ssize_t
6461 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6462                        loff_t *ppos)
6463 {
6464         struct seq_file *m = filp->private_data;
6465         struct trace_iterator *iter = m->private;
6466         struct trace_array *tr = iter->tr;
6467         unsigned long val;
6468         int ret;
6469
6470         ret = tracing_update_buffers();
6471         if (ret < 0)
6472                 return ret;
6473
6474         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6475         if (ret)
6476                 return ret;
6477
6478         mutex_lock(&trace_types_lock);
6479
6480         if (tr->current_trace->use_max_tr) {
6481                 ret = -EBUSY;
6482                 goto out;
6483         }
6484
6485         switch (val) {
6486         case 0:
6487                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6488                         ret = -EINVAL;
6489                         break;
6490                 }
6491                 if (tr->allocated_snapshot)
6492                         free_snapshot(tr);
6493                 break;
6494         case 1:
6495 /* Only allow per-cpu swap if the ring buffer supports it */
6496 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6497                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6498                         ret = -EINVAL;
6499                         break;
6500                 }
6501 #endif
6502                 if (tr->allocated_snapshot)
6503                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6504                                         &tr->trace_buffer, iter->cpu_file);
6505                 else
6506                         ret = tracing_alloc_snapshot_instance(tr);
6507                 if (ret < 0)
6508                         break;
6509                 local_irq_disable();
6510                 /* Now, we're going to swap */
6511                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6512                         update_max_tr(tr, current, smp_processor_id());
6513                 else
6514                         update_max_tr_single(tr, current, iter->cpu_file);
6515                 local_irq_enable();
6516                 break;
6517         default:
6518                 if (tr->allocated_snapshot) {
6519                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6520                                 tracing_reset_online_cpus(&tr->max_buffer);
6521                         else
6522                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6523                 }
6524                 break;
6525         }
6526
6527         if (ret >= 0) {
6528                 *ppos += cnt;
6529                 ret = cnt;
6530         }
6531 out:
6532         mutex_unlock(&trace_types_lock);
6533         return ret;
6534 }
6535
6536 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6537 {
6538         struct seq_file *m = file->private_data;
6539         int ret;
6540
6541         ret = tracing_release(inode, file);
6542
6543         if (file->f_mode & FMODE_READ)
6544                 return ret;
6545
6546         /* If write only, the seq_file is just a stub */
6547         if (m)
6548                 kfree(m->private);
6549         kfree(m);
6550
6551         return 0;
6552 }
6553
6554 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6555 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6556                                     size_t count, loff_t *ppos);
6557 static int tracing_buffers_release(struct inode *inode, struct file *file);
6558 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6559                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6560
6561 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6562 {
6563         struct ftrace_buffer_info *info;
6564         int ret;
6565
6566         ret = tracing_buffers_open(inode, filp);
6567         if (ret < 0)
6568                 return ret;
6569
6570         info = filp->private_data;
6571
6572         if (info->iter.trace->use_max_tr) {
6573                 tracing_buffers_release(inode, filp);
6574                 return -EBUSY;
6575         }
6576
6577         info->iter.snapshot = true;
6578         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6579
6580         return ret;
6581 }
6582
6583 #endif /* CONFIG_TRACER_SNAPSHOT */
6584
6585
6586 static const struct file_operations tracing_thresh_fops = {
6587         .open           = tracing_open_generic,
6588         .read           = tracing_thresh_read,
6589         .write          = tracing_thresh_write,
6590         .llseek         = generic_file_llseek,
6591 };
6592
6593 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6594 static const struct file_operations tracing_max_lat_fops = {
6595         .open           = tracing_open_generic,
6596         .read           = tracing_max_lat_read,
6597         .write          = tracing_max_lat_write,
6598         .llseek         = generic_file_llseek,
6599 };
6600 #endif
6601
6602 static const struct file_operations set_tracer_fops = {
6603         .open           = tracing_open_generic,
6604         .read           = tracing_set_trace_read,
6605         .write          = tracing_set_trace_write,
6606         .llseek         = generic_file_llseek,
6607 };
6608
6609 static const struct file_operations tracing_pipe_fops = {
6610         .open           = tracing_open_pipe,
6611         .poll           = tracing_poll_pipe,
6612         .read           = tracing_read_pipe,
6613         .splice_read    = tracing_splice_read_pipe,
6614         .release        = tracing_release_pipe,
6615         .llseek         = no_llseek,
6616 };
6617
6618 static const struct file_operations tracing_entries_fops = {
6619         .open           = tracing_open_generic_tr,
6620         .read           = tracing_entries_read,
6621         .write          = tracing_entries_write,
6622         .llseek         = generic_file_llseek,
6623         .release        = tracing_release_generic_tr,
6624 };
6625
6626 static const struct file_operations tracing_total_entries_fops = {
6627         .open           = tracing_open_generic_tr,
6628         .read           = tracing_total_entries_read,
6629         .llseek         = generic_file_llseek,
6630         .release        = tracing_release_generic_tr,
6631 };
6632
6633 static const struct file_operations tracing_free_buffer_fops = {
6634         .open           = tracing_open_generic_tr,
6635         .write          = tracing_free_buffer_write,
6636         .release        = tracing_free_buffer_release,
6637 };
6638
6639 static const struct file_operations tracing_mark_fops = {
6640         .open           = tracing_open_generic_tr,
6641         .write          = tracing_mark_write,
6642         .llseek         = generic_file_llseek,
6643         .release        = tracing_release_generic_tr,
6644 };
6645
6646 static const struct file_operations tracing_mark_raw_fops = {
6647         .open           = tracing_open_generic_tr,
6648         .write          = tracing_mark_raw_write,
6649         .llseek         = generic_file_llseek,
6650         .release        = tracing_release_generic_tr,
6651 };
6652
6653 static const struct file_operations trace_clock_fops = {
6654         .open           = tracing_clock_open,
6655         .read           = seq_read,
6656         .llseek         = seq_lseek,
6657         .release        = tracing_single_release_tr,
6658         .write          = tracing_clock_write,
6659 };
6660
6661 static const struct file_operations trace_time_stamp_mode_fops = {
6662         .open           = tracing_time_stamp_mode_open,
6663         .read           = seq_read,
6664         .llseek         = seq_lseek,
6665         .release        = tracing_single_release_tr,
6666 };
6667
6668 #ifdef CONFIG_TRACER_SNAPSHOT
6669 static const struct file_operations snapshot_fops = {
6670         .open           = tracing_snapshot_open,
6671         .read           = seq_read,
6672         .write          = tracing_snapshot_write,
6673         .llseek         = tracing_lseek,
6674         .release        = tracing_snapshot_release,
6675 };
6676
6677 static const struct file_operations snapshot_raw_fops = {
6678         .open           = snapshot_raw_open,
6679         .read           = tracing_buffers_read,
6680         .release        = tracing_buffers_release,
6681         .splice_read    = tracing_buffers_splice_read,
6682         .llseek         = no_llseek,
6683 };
6684
6685 #endif /* CONFIG_TRACER_SNAPSHOT */
6686
6687 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6688 {
6689         struct trace_array *tr = inode->i_private;
6690         struct ftrace_buffer_info *info;
6691         int ret;
6692
6693         if (tracing_disabled)
6694                 return -ENODEV;
6695
6696         if (trace_array_get(tr) < 0)
6697                 return -ENODEV;
6698
6699         info = kzalloc(sizeof(*info), GFP_KERNEL);
6700         if (!info) {
6701                 trace_array_put(tr);
6702                 return -ENOMEM;
6703         }
6704
6705         mutex_lock(&trace_types_lock);
6706
6707         info->iter.tr           = tr;
6708         info->iter.cpu_file     = tracing_get_cpu(inode);
6709         info->iter.trace        = tr->current_trace;
6710         info->iter.trace_buffer = &tr->trace_buffer;
6711         info->spare             = NULL;
6712         /* Force reading ring buffer for first read */
6713         info->read              = (unsigned int)-1;
6714
6715         filp->private_data = info;
6716
6717         tr->current_trace->ref++;
6718
6719         mutex_unlock(&trace_types_lock);
6720
6721         ret = nonseekable_open(inode, filp);
6722         if (ret < 0)
6723                 trace_array_put(tr);
6724
6725         return ret;
6726 }
6727
6728 static __poll_t
6729 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6730 {
6731         struct ftrace_buffer_info *info = filp->private_data;
6732         struct trace_iterator *iter = &info->iter;
6733
6734         return trace_poll(iter, filp, poll_table);
6735 }
6736
6737 static ssize_t
6738 tracing_buffers_read(struct file *filp, char __user *ubuf,
6739                      size_t count, loff_t *ppos)
6740 {
6741         struct ftrace_buffer_info *info = filp->private_data;
6742         struct trace_iterator *iter = &info->iter;
6743         ssize_t ret = 0;
6744         ssize_t size;
6745
6746         if (!count)
6747                 return 0;
6748
6749 #ifdef CONFIG_TRACER_MAX_TRACE
6750         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6751                 return -EBUSY;
6752 #endif
6753
6754         if (!info->spare) {
6755                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6756                                                           iter->cpu_file);
6757                 if (IS_ERR(info->spare)) {
6758                         ret = PTR_ERR(info->spare);
6759                         info->spare = NULL;
6760                 } else {
6761                         info->spare_cpu = iter->cpu_file;
6762                 }
6763         }
6764         if (!info->spare)
6765                 return ret;
6766
6767         /* Do we have previous read data to read? */
6768         if (info->read < PAGE_SIZE)
6769                 goto read;
6770
6771  again:
6772         trace_access_lock(iter->cpu_file);
6773         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6774                                     &info->spare,
6775                                     count,
6776                                     iter->cpu_file, 0);
6777         trace_access_unlock(iter->cpu_file);
6778
6779         if (ret < 0) {
6780                 if (trace_empty(iter)) {
6781                         if ((filp->f_flags & O_NONBLOCK))
6782                                 return -EAGAIN;
6783
6784                         ret = wait_on_pipe(iter, false);
6785                         if (ret)
6786                                 return ret;
6787
6788                         goto again;
6789                 }
6790                 return 0;
6791         }
6792
6793         info->read = 0;
6794  read:
6795         size = PAGE_SIZE - info->read;
6796         if (size > count)
6797                 size = count;
6798
6799         ret = copy_to_user(ubuf, info->spare + info->read, size);
6800         if (ret == size)
6801                 return -EFAULT;
6802
6803         size -= ret;
6804
6805         *ppos += size;
6806         info->read += size;
6807
6808         return size;
6809 }
6810
6811 static int tracing_buffers_release(struct inode *inode, struct file *file)
6812 {
6813         struct ftrace_buffer_info *info = file->private_data;
6814         struct trace_iterator *iter = &info->iter;
6815
6816         mutex_lock(&trace_types_lock);
6817
6818         iter->tr->current_trace->ref--;
6819
6820         __trace_array_put(iter->tr);
6821
6822         if (info->spare)
6823                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6824                                            info->spare_cpu, info->spare);
6825         kfree(info);
6826
6827         mutex_unlock(&trace_types_lock);
6828
6829         return 0;
6830 }
6831
6832 struct buffer_ref {
6833         struct ring_buffer      *buffer;
6834         void                    *page;
6835         int                     cpu;
6836         refcount_t              refcount;
6837 };
6838
6839 static void buffer_ref_release(struct buffer_ref *ref)
6840 {
6841         if (!refcount_dec_and_test(&ref->refcount))
6842                 return;
6843         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6844         kfree(ref);
6845 }
6846
6847 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6848                                     struct pipe_buffer *buf)
6849 {
6850         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6851
6852         buffer_ref_release(ref);
6853         buf->private = 0;
6854 }
6855
6856 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6857                                 struct pipe_buffer *buf)
6858 {
6859         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6860
6861         if (refcount_read(&ref->refcount) > INT_MAX/2)
6862                 return false;
6863
6864         refcount_inc(&ref->refcount);
6865         return true;
6866 }
6867
6868 /* Pipe buffer operations for a buffer. */
6869 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6870         .can_merge              = 0,
6871         .confirm                = generic_pipe_buf_confirm,
6872         .release                = buffer_pipe_buf_release,
6873         .steal                  = generic_pipe_buf_nosteal,
6874         .get                    = buffer_pipe_buf_get,
6875 };
6876
6877 /*
6878  * Callback from splice_to_pipe(), if we need to release some pages
6879  * at the end of the spd in case we error'ed out in filling the pipe.
6880  */
6881 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6882 {
6883         struct buffer_ref *ref =
6884                 (struct buffer_ref *)spd->partial[i].private;
6885
6886         buffer_ref_release(ref);
6887         spd->partial[i].private = 0;
6888 }
6889
6890 static ssize_t
6891 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6892                             struct pipe_inode_info *pipe, size_t len,
6893                             unsigned int flags)
6894 {
6895         struct ftrace_buffer_info *info = file->private_data;
6896         struct trace_iterator *iter = &info->iter;
6897         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6898         struct page *pages_def[PIPE_DEF_BUFFERS];
6899         struct splice_pipe_desc spd = {
6900                 .pages          = pages_def,
6901                 .partial        = partial_def,
6902                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6903                 .ops            = &buffer_pipe_buf_ops,
6904                 .spd_release    = buffer_spd_release,
6905         };
6906         struct buffer_ref *ref;
6907         int entries, i;
6908         ssize_t ret = 0;
6909
6910 #ifdef CONFIG_TRACER_MAX_TRACE
6911         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6912                 return -EBUSY;
6913 #endif
6914
6915         if (*ppos & (PAGE_SIZE - 1))
6916                 return -EINVAL;
6917
6918         if (len & (PAGE_SIZE - 1)) {
6919                 if (len < PAGE_SIZE)
6920                         return -EINVAL;
6921                 len &= PAGE_MASK;
6922         }
6923
6924         if (splice_grow_spd(pipe, &spd))
6925                 return -ENOMEM;
6926
6927  again:
6928         trace_access_lock(iter->cpu_file);
6929         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6930
6931         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6932                 struct page *page;
6933                 int r;
6934
6935                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6936                 if (!ref) {
6937                         ret = -ENOMEM;
6938                         break;
6939                 }
6940
6941                 refcount_set(&ref->refcount, 1);
6942                 ref->buffer = iter->trace_buffer->buffer;
6943                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6944                 if (IS_ERR(ref->page)) {
6945                         ret = PTR_ERR(ref->page);
6946                         ref->page = NULL;
6947                         kfree(ref);
6948                         break;
6949                 }
6950                 ref->cpu = iter->cpu_file;
6951
6952                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6953                                           len, iter->cpu_file, 1);
6954                 if (r < 0) {
6955                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6956                                                    ref->page);
6957                         kfree(ref);
6958                         break;
6959                 }
6960
6961                 page = virt_to_page(ref->page);
6962
6963                 spd.pages[i] = page;
6964                 spd.partial[i].len = PAGE_SIZE;
6965                 spd.partial[i].offset = 0;
6966                 spd.partial[i].private = (unsigned long)ref;
6967                 spd.nr_pages++;
6968                 *ppos += PAGE_SIZE;
6969
6970                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6971         }
6972
6973         trace_access_unlock(iter->cpu_file);
6974         spd.nr_pages = i;
6975
6976         /* did we read anything? */
6977         if (!spd.nr_pages) {
6978                 if (ret)
6979                         goto out;
6980
6981                 ret = -EAGAIN;
6982                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6983                         goto out;
6984
6985                 ret = wait_on_pipe(iter, true);
6986                 if (ret)
6987                         goto out;
6988
6989                 goto again;
6990         }
6991
6992         ret = splice_to_pipe(pipe, &spd);
6993 out:
6994         splice_shrink_spd(&spd);
6995
6996         return ret;
6997 }
6998
6999 static const struct file_operations tracing_buffers_fops = {
7000         .open           = tracing_buffers_open,
7001         .read           = tracing_buffers_read,
7002         .poll           = tracing_buffers_poll,
7003         .release        = tracing_buffers_release,
7004         .splice_read    = tracing_buffers_splice_read,
7005         .llseek         = no_llseek,
7006 };
7007
7008 static ssize_t
7009 tracing_stats_read(struct file *filp, char __user *ubuf,
7010                    size_t count, loff_t *ppos)
7011 {
7012         struct inode *inode = file_inode(filp);
7013         struct trace_array *tr = inode->i_private;
7014         struct trace_buffer *trace_buf = &tr->trace_buffer;
7015         int cpu = tracing_get_cpu(inode);
7016         struct trace_seq *s;
7017         unsigned long cnt;
7018         unsigned long long t;
7019         unsigned long usec_rem;
7020
7021         s = kmalloc(sizeof(*s), GFP_KERNEL);
7022         if (!s)
7023                 return -ENOMEM;
7024
7025         trace_seq_init(s);
7026
7027         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7028         trace_seq_printf(s, "entries: %ld\n", cnt);
7029
7030         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7031         trace_seq_printf(s, "overrun: %ld\n", cnt);
7032
7033         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7034         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7035
7036         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7037         trace_seq_printf(s, "bytes: %ld\n", cnt);
7038
7039         if (trace_clocks[tr->clock_id].in_ns) {
7040                 /* local or global for trace_clock */
7041                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7042                 usec_rem = do_div(t, USEC_PER_SEC);
7043                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7044                                                                 t, usec_rem);
7045
7046                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7047                 usec_rem = do_div(t, USEC_PER_SEC);
7048                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7049         } else {
7050                 /* counter or tsc mode for trace_clock */
7051                 trace_seq_printf(s, "oldest event ts: %llu\n",
7052                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7053
7054                 trace_seq_printf(s, "now ts: %llu\n",
7055                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7056         }
7057
7058         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7059         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7060
7061         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7062         trace_seq_printf(s, "read events: %ld\n", cnt);
7063
7064         count = simple_read_from_buffer(ubuf, count, ppos,
7065                                         s->buffer, trace_seq_used(s));
7066
7067         kfree(s);
7068
7069         return count;
7070 }
7071
7072 static const struct file_operations tracing_stats_fops = {
7073         .open           = tracing_open_generic_tr,
7074         .read           = tracing_stats_read,
7075         .llseek         = generic_file_llseek,
7076         .release        = tracing_release_generic_tr,
7077 };
7078
7079 #ifdef CONFIG_DYNAMIC_FTRACE
7080
7081 static ssize_t
7082 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7083                   size_t cnt, loff_t *ppos)
7084 {
7085         unsigned long *p = filp->private_data;
7086         char buf[64]; /* Not too big for a shallow stack */
7087         int r;
7088
7089         r = scnprintf(buf, 63, "%ld", *p);
7090         buf[r++] = '\n';
7091
7092         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7093 }
7094
7095 static const struct file_operations tracing_dyn_info_fops = {
7096         .open           = tracing_open_generic,
7097         .read           = tracing_read_dyn_info,
7098         .llseek         = generic_file_llseek,
7099 };
7100 #endif /* CONFIG_DYNAMIC_FTRACE */
7101
7102 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7103 static void
7104 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7105                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7106                 void *data)
7107 {
7108         tracing_snapshot_instance(tr);
7109 }
7110
7111 static void
7112 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7113                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7114                       void *data)
7115 {
7116         struct ftrace_func_mapper *mapper = data;
7117         long *count = NULL;
7118
7119         if (mapper)
7120                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7121
7122         if (count) {
7123
7124                 if (*count <= 0)
7125                         return;
7126
7127                 (*count)--;
7128         }
7129
7130         tracing_snapshot_instance(tr);
7131 }
7132
7133 static int
7134 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7135                       struct ftrace_probe_ops *ops, void *data)
7136 {
7137         struct ftrace_func_mapper *mapper = data;
7138         long *count = NULL;
7139
7140         seq_printf(m, "%ps:", (void *)ip);
7141
7142         seq_puts(m, "snapshot");
7143
7144         if (mapper)
7145                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7146
7147         if (count)
7148                 seq_printf(m, ":count=%ld\n", *count);
7149         else
7150                 seq_puts(m, ":unlimited\n");
7151
7152         return 0;
7153 }
7154
7155 static int
7156 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7157                      unsigned long ip, void *init_data, void **data)
7158 {
7159         struct ftrace_func_mapper *mapper = *data;
7160
7161         if (!mapper) {
7162                 mapper = allocate_ftrace_func_mapper();
7163                 if (!mapper)
7164                         return -ENOMEM;
7165                 *data = mapper;
7166         }
7167
7168         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7169 }
7170
7171 static void
7172 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7173                      unsigned long ip, void *data)
7174 {
7175         struct ftrace_func_mapper *mapper = data;
7176
7177         if (!ip) {
7178                 if (!mapper)
7179                         return;
7180                 free_ftrace_func_mapper(mapper, NULL);
7181                 return;
7182         }
7183
7184         ftrace_func_mapper_remove_ip(mapper, ip);
7185 }
7186
7187 static struct ftrace_probe_ops snapshot_probe_ops = {
7188         .func                   = ftrace_snapshot,
7189         .print                  = ftrace_snapshot_print,
7190 };
7191
7192 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7193         .func                   = ftrace_count_snapshot,
7194         .print                  = ftrace_snapshot_print,
7195         .init                   = ftrace_snapshot_init,
7196         .free                   = ftrace_snapshot_free,
7197 };
7198
7199 static int
7200 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7201                                char *glob, char *cmd, char *param, int enable)
7202 {
7203         struct ftrace_probe_ops *ops;
7204         void *count = (void *)-1;
7205         char *number;
7206         int ret;
7207
7208         if (!tr)
7209                 return -ENODEV;
7210
7211         /* hash funcs only work with set_ftrace_filter */
7212         if (!enable)
7213                 return -EINVAL;
7214
7215         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7216
7217         if (glob[0] == '!')
7218                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7219
7220         if (!param)
7221                 goto out_reg;
7222
7223         number = strsep(&param, ":");
7224
7225         if (!strlen(number))
7226                 goto out_reg;
7227
7228         /*
7229          * We use the callback data field (which is a pointer)
7230          * as our counter.
7231          */
7232         ret = kstrtoul(number, 0, (unsigned long *)&count);
7233         if (ret)
7234                 return ret;
7235
7236  out_reg:
7237         ret = tracing_alloc_snapshot_instance(tr);
7238         if (ret < 0)
7239                 goto out;
7240
7241         ret = register_ftrace_function_probe(glob, tr, ops, count);
7242
7243  out:
7244         return ret < 0 ? ret : 0;
7245 }
7246
7247 static struct ftrace_func_command ftrace_snapshot_cmd = {
7248         .name                   = "snapshot",
7249         .func                   = ftrace_trace_snapshot_callback,
7250 };
7251
7252 static __init int register_snapshot_cmd(void)
7253 {
7254         return register_ftrace_command(&ftrace_snapshot_cmd);
7255 }
7256 #else
7257 static inline __init int register_snapshot_cmd(void) { return 0; }
7258 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7259
7260 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7261 {
7262         if (WARN_ON(!tr->dir))
7263                 return ERR_PTR(-ENODEV);
7264
7265         /* Top directory uses NULL as the parent */
7266         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7267                 return NULL;
7268
7269         /* All sub buffers have a descriptor */
7270         return tr->dir;
7271 }
7272
7273 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7274 {
7275         struct dentry *d_tracer;
7276
7277         if (tr->percpu_dir)
7278                 return tr->percpu_dir;
7279
7280         d_tracer = tracing_get_dentry(tr);
7281         if (IS_ERR(d_tracer))
7282                 return NULL;
7283
7284         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7285
7286         WARN_ONCE(!tr->percpu_dir,
7287                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7288
7289         return tr->percpu_dir;
7290 }
7291
7292 static struct dentry *
7293 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7294                       void *data, long cpu, const struct file_operations *fops)
7295 {
7296         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7297
7298         if (ret) /* See tracing_get_cpu() */
7299                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7300         return ret;
7301 }
7302
7303 static void
7304 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7305 {
7306         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7307         struct dentry *d_cpu;
7308         char cpu_dir[30]; /* 30 characters should be more than enough */
7309
7310         if (!d_percpu)
7311                 return;
7312
7313         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7314         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7315         if (!d_cpu) {
7316                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7317                 return;
7318         }
7319
7320         /* per cpu trace_pipe */
7321         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7322                                 tr, cpu, &tracing_pipe_fops);
7323
7324         /* per cpu trace */
7325         trace_create_cpu_file("trace", 0644, d_cpu,
7326                                 tr, cpu, &tracing_fops);
7327
7328         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7329                                 tr, cpu, &tracing_buffers_fops);
7330
7331         trace_create_cpu_file("stats", 0444, d_cpu,
7332                                 tr, cpu, &tracing_stats_fops);
7333
7334         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7335                                 tr, cpu, &tracing_entries_fops);
7336
7337 #ifdef CONFIG_TRACER_SNAPSHOT
7338         trace_create_cpu_file("snapshot", 0644, d_cpu,
7339                                 tr, cpu, &snapshot_fops);
7340
7341         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7342                                 tr, cpu, &snapshot_raw_fops);
7343 #endif
7344 }
7345
7346 #ifdef CONFIG_FTRACE_SELFTEST
7347 /* Let selftest have access to static functions in this file */
7348 #include "trace_selftest.c"
7349 #endif
7350
7351 static ssize_t
7352 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7353                         loff_t *ppos)
7354 {
7355         struct trace_option_dentry *topt = filp->private_data;
7356         char *buf;
7357
7358         if (topt->flags->val & topt->opt->bit)
7359                 buf = "1\n";
7360         else
7361                 buf = "0\n";
7362
7363         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7364 }
7365
7366 static ssize_t
7367 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7368                          loff_t *ppos)
7369 {
7370         struct trace_option_dentry *topt = filp->private_data;
7371         unsigned long val;
7372         int ret;
7373
7374         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7375         if (ret)
7376                 return ret;
7377
7378         if (val != 0 && val != 1)
7379                 return -EINVAL;
7380
7381         if (!!(topt->flags->val & topt->opt->bit) != val) {
7382                 mutex_lock(&trace_types_lock);
7383                 ret = __set_tracer_option(topt->tr, topt->flags,
7384                                           topt->opt, !val);
7385                 mutex_unlock(&trace_types_lock);
7386                 if (ret)
7387                         return ret;
7388         }
7389
7390         *ppos += cnt;
7391
7392         return cnt;
7393 }
7394
7395
7396 static const struct file_operations trace_options_fops = {
7397         .open = tracing_open_generic,
7398         .read = trace_options_read,
7399         .write = trace_options_write,
7400         .llseek = generic_file_llseek,
7401 };
7402
7403 /*
7404  * In order to pass in both the trace_array descriptor as well as the index
7405  * to the flag that the trace option file represents, the trace_array
7406  * has a character array of trace_flags_index[], which holds the index
7407  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7408  * The address of this character array is passed to the flag option file
7409  * read/write callbacks.
7410  *
7411  * In order to extract both the index and the trace_array descriptor,
7412  * get_tr_index() uses the following algorithm.
7413  *
7414  *   idx = *ptr;
7415  *
7416  * As the pointer itself contains the address of the index (remember
7417  * index[1] == 1).
7418  *
7419  * Then to get the trace_array descriptor, by subtracting that index
7420  * from the ptr, we get to the start of the index itself.
7421  *
7422  *   ptr - idx == &index[0]
7423  *
7424  * Then a simple container_of() from that pointer gets us to the
7425  * trace_array descriptor.
7426  */
7427 static void get_tr_index(void *data, struct trace_array **ptr,
7428                          unsigned int *pindex)
7429 {
7430         *pindex = *(unsigned char *)data;
7431
7432         *ptr = container_of(data - *pindex, struct trace_array,
7433                             trace_flags_index);
7434 }
7435
7436 static ssize_t
7437 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7438                         loff_t *ppos)
7439 {
7440         void *tr_index = filp->private_data;
7441         struct trace_array *tr;
7442         unsigned int index;
7443         char *buf;
7444
7445         get_tr_index(tr_index, &tr, &index);
7446
7447         if (tr->trace_flags & (1 << index))
7448                 buf = "1\n";
7449         else
7450                 buf = "0\n";
7451
7452         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7453 }
7454
7455 static ssize_t
7456 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7457                          loff_t *ppos)
7458 {
7459         void *tr_index = filp->private_data;
7460         struct trace_array *tr;
7461         unsigned int index;
7462         unsigned long val;
7463         int ret;
7464
7465         get_tr_index(tr_index, &tr, &index);
7466
7467         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7468         if (ret)
7469                 return ret;
7470
7471         if (val != 0 && val != 1)
7472                 return -EINVAL;
7473
7474         mutex_lock(&event_mutex);
7475         mutex_lock(&trace_types_lock);
7476         ret = set_tracer_flag(tr, 1 << index, val);
7477         mutex_unlock(&trace_types_lock);
7478         mutex_unlock(&event_mutex);
7479
7480         if (ret < 0)
7481                 return ret;
7482
7483         *ppos += cnt;
7484
7485         return cnt;
7486 }
7487
7488 static const struct file_operations trace_options_core_fops = {
7489         .open = tracing_open_generic,
7490         .read = trace_options_core_read,
7491         .write = trace_options_core_write,
7492         .llseek = generic_file_llseek,
7493 };
7494
7495 struct dentry *trace_create_file(const char *name,
7496                                  umode_t mode,
7497                                  struct dentry *parent,
7498                                  void *data,
7499                                  const struct file_operations *fops)
7500 {
7501         struct dentry *ret;
7502
7503         ret = tracefs_create_file(name, mode, parent, data, fops);
7504         if (!ret)
7505                 pr_warn("Could not create tracefs '%s' entry\n", name);
7506
7507         return ret;
7508 }
7509
7510
7511 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7512 {
7513         struct dentry *d_tracer;
7514
7515         if (tr->options)
7516                 return tr->options;
7517
7518         d_tracer = tracing_get_dentry(tr);
7519         if (IS_ERR(d_tracer))
7520                 return NULL;
7521
7522         tr->options = tracefs_create_dir("options", d_tracer);
7523         if (!tr->options) {
7524                 pr_warn("Could not create tracefs directory 'options'\n");
7525                 return NULL;
7526         }
7527
7528         return tr->options;
7529 }
7530
7531 static void
7532 create_trace_option_file(struct trace_array *tr,
7533                          struct trace_option_dentry *topt,
7534                          struct tracer_flags *flags,
7535                          struct tracer_opt *opt)
7536 {
7537         struct dentry *t_options;
7538
7539         t_options = trace_options_init_dentry(tr);
7540         if (!t_options)
7541                 return;
7542
7543         topt->flags = flags;
7544         topt->opt = opt;
7545         topt->tr = tr;
7546
7547         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7548                                     &trace_options_fops);
7549
7550 }
7551
7552 static void
7553 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7554 {
7555         struct trace_option_dentry *topts;
7556         struct trace_options *tr_topts;
7557         struct tracer_flags *flags;
7558         struct tracer_opt *opts;
7559         int cnt;
7560         int i;
7561
7562         if (!tracer)
7563                 return;
7564
7565         flags = tracer->flags;
7566
7567         if (!flags || !flags->opts)
7568                 return;
7569
7570         /*
7571          * If this is an instance, only create flags for tracers
7572          * the instance may have.
7573          */
7574         if (!trace_ok_for_array(tracer, tr))
7575                 return;
7576
7577         for (i = 0; i < tr->nr_topts; i++) {
7578                 /* Make sure there's no duplicate flags. */
7579                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7580                         return;
7581         }
7582
7583         opts = flags->opts;
7584
7585         for (cnt = 0; opts[cnt].name; cnt++)
7586                 ;
7587
7588         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7589         if (!topts)
7590                 return;
7591
7592         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7593                             GFP_KERNEL);
7594         if (!tr_topts) {
7595                 kfree(topts);
7596                 return;
7597         }
7598
7599         tr->topts = tr_topts;
7600         tr->topts[tr->nr_topts].tracer = tracer;
7601         tr->topts[tr->nr_topts].topts = topts;
7602         tr->nr_topts++;
7603
7604         for (cnt = 0; opts[cnt].name; cnt++) {
7605                 create_trace_option_file(tr, &topts[cnt], flags,
7606                                          &opts[cnt]);
7607                 WARN_ONCE(topts[cnt].entry == NULL,
7608                           "Failed to create trace option: %s",
7609                           opts[cnt].name);
7610         }
7611 }
7612
7613 static struct dentry *
7614 create_trace_option_core_file(struct trace_array *tr,
7615                               const char *option, long index)
7616 {
7617         struct dentry *t_options;
7618
7619         t_options = trace_options_init_dentry(tr);
7620         if (!t_options)
7621                 return NULL;
7622
7623         return trace_create_file(option, 0644, t_options,
7624                                  (void *)&tr->trace_flags_index[index],
7625                                  &trace_options_core_fops);
7626 }
7627
7628 static void create_trace_options_dir(struct trace_array *tr)
7629 {
7630         struct dentry *t_options;
7631         bool top_level = tr == &global_trace;
7632         int i;
7633
7634         t_options = trace_options_init_dentry(tr);
7635         if (!t_options)
7636                 return;
7637
7638         for (i = 0; trace_options[i]; i++) {
7639                 if (top_level ||
7640                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7641                         create_trace_option_core_file(tr, trace_options[i], i);
7642         }
7643 }
7644
7645 static ssize_t
7646 rb_simple_read(struct file *filp, char __user *ubuf,
7647                size_t cnt, loff_t *ppos)
7648 {
7649         struct trace_array *tr = filp->private_data;
7650         char buf[64];
7651         int r;
7652
7653         r = tracer_tracing_is_on(tr);
7654         r = sprintf(buf, "%d\n", r);
7655
7656         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7657 }
7658
7659 static ssize_t
7660 rb_simple_write(struct file *filp, const char __user *ubuf,
7661                 size_t cnt, loff_t *ppos)
7662 {
7663         struct trace_array *tr = filp->private_data;
7664         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7665         unsigned long val;
7666         int ret;
7667
7668         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7669         if (ret)
7670                 return ret;
7671
7672         if (buffer) {
7673                 mutex_lock(&trace_types_lock);
7674                 if (!!val == tracer_tracing_is_on(tr)) {
7675                         val = 0; /* do nothing */
7676                 } else if (val) {
7677                         tracer_tracing_on(tr);
7678                         if (tr->current_trace->start)
7679                                 tr->current_trace->start(tr);
7680                 } else {
7681                         tracer_tracing_off(tr);
7682                         if (tr->current_trace->stop)
7683                                 tr->current_trace->stop(tr);
7684                 }
7685                 mutex_unlock(&trace_types_lock);
7686         }
7687
7688         (*ppos)++;
7689
7690         return cnt;
7691 }
7692
7693 static const struct file_operations rb_simple_fops = {
7694         .open           = tracing_open_generic_tr,
7695         .read           = rb_simple_read,
7696         .write          = rb_simple_write,
7697         .release        = tracing_release_generic_tr,
7698         .llseek         = default_llseek,
7699 };
7700
7701 struct dentry *trace_instance_dir;
7702
7703 static void
7704 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7705
7706 static int
7707 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7708 {
7709         enum ring_buffer_flags rb_flags;
7710
7711         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7712
7713         buf->tr = tr;
7714
7715         buf->buffer = ring_buffer_alloc(size, rb_flags);
7716         if (!buf->buffer)
7717                 return -ENOMEM;
7718
7719         buf->data = alloc_percpu(struct trace_array_cpu);
7720         if (!buf->data) {
7721                 ring_buffer_free(buf->buffer);
7722                 buf->buffer = NULL;
7723                 return -ENOMEM;
7724         }
7725
7726         /* Allocate the first page for all buffers */
7727         set_buffer_entries(&tr->trace_buffer,
7728                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7729
7730         return 0;
7731 }
7732
7733 static int allocate_trace_buffers(struct trace_array *tr, int size)
7734 {
7735         int ret;
7736
7737         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7738         if (ret)
7739                 return ret;
7740
7741 #ifdef CONFIG_TRACER_MAX_TRACE
7742         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7743                                     allocate_snapshot ? size : 1);
7744         if (WARN_ON(ret)) {
7745                 ring_buffer_free(tr->trace_buffer.buffer);
7746                 tr->trace_buffer.buffer = NULL;
7747                 free_percpu(tr->trace_buffer.data);
7748                 tr->trace_buffer.data = NULL;
7749                 return -ENOMEM;
7750         }
7751         tr->allocated_snapshot = allocate_snapshot;
7752
7753         /*
7754          * Only the top level trace array gets its snapshot allocated
7755          * from the kernel command line.
7756          */
7757         allocate_snapshot = false;
7758 #endif
7759
7760         /*
7761          * Because of some magic with the way alloc_percpu() works on
7762          * x86_64, we need to synchronize the pgd of all the tables,
7763          * otherwise the trace events that happen in x86_64 page fault
7764          * handlers can't cope with accessing the chance that a
7765          * alloc_percpu()'d memory might be touched in the page fault trace
7766          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7767          * calls in tracing, because something might get triggered within a
7768          * page fault trace event!
7769          */
7770         vmalloc_sync_mappings();
7771
7772         return 0;
7773 }
7774
7775 static void free_trace_buffer(struct trace_buffer *buf)
7776 {
7777         if (buf->buffer) {
7778                 ring_buffer_free(buf->buffer);
7779                 buf->buffer = NULL;
7780                 free_percpu(buf->data);
7781                 buf->data = NULL;
7782         }
7783 }
7784
7785 static void free_trace_buffers(struct trace_array *tr)
7786 {
7787         if (!tr)
7788                 return;
7789
7790         free_trace_buffer(&tr->trace_buffer);
7791
7792 #ifdef CONFIG_TRACER_MAX_TRACE
7793         free_trace_buffer(&tr->max_buffer);
7794 #endif
7795 }
7796
7797 static void init_trace_flags_index(struct trace_array *tr)
7798 {
7799         int i;
7800
7801         /* Used by the trace options files */
7802         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7803                 tr->trace_flags_index[i] = i;
7804 }
7805
7806 static void __update_tracer_options(struct trace_array *tr)
7807 {
7808         struct tracer *t;
7809
7810         for (t = trace_types; t; t = t->next)
7811                 add_tracer_options(tr, t);
7812 }
7813
7814 static void update_tracer_options(struct trace_array *tr)
7815 {
7816         mutex_lock(&trace_types_lock);
7817         __update_tracer_options(tr);
7818         mutex_unlock(&trace_types_lock);
7819 }
7820
7821 static int instance_mkdir(const char *name)
7822 {
7823         struct trace_array *tr;
7824         int ret;
7825
7826         mutex_lock(&event_mutex);
7827         mutex_lock(&trace_types_lock);
7828
7829         ret = -EEXIST;
7830         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7831                 if (tr->name && strcmp(tr->name, name) == 0)
7832                         goto out_unlock;
7833         }
7834
7835         ret = -ENOMEM;
7836         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7837         if (!tr)
7838                 goto out_unlock;
7839
7840         tr->name = kstrdup(name, GFP_KERNEL);
7841         if (!tr->name)
7842                 goto out_free_tr;
7843
7844         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7845                 goto out_free_tr;
7846
7847         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7848
7849         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7850
7851         raw_spin_lock_init(&tr->start_lock);
7852
7853         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7854
7855         tr->current_trace = &nop_trace;
7856
7857         INIT_LIST_HEAD(&tr->systems);
7858         INIT_LIST_HEAD(&tr->events);
7859         INIT_LIST_HEAD(&tr->hist_vars);
7860
7861         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7862                 goto out_free_tr;
7863
7864         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7865         if (!tr->dir)
7866                 goto out_free_tr;
7867
7868         ret = event_trace_add_tracer(tr->dir, tr);
7869         if (ret) {
7870                 tracefs_remove_recursive(tr->dir);
7871                 goto out_free_tr;
7872         }
7873
7874         ftrace_init_trace_array(tr);
7875
7876         init_tracer_tracefs(tr, tr->dir);
7877         init_trace_flags_index(tr);
7878         __update_tracer_options(tr);
7879
7880         list_add(&tr->list, &ftrace_trace_arrays);
7881
7882         mutex_unlock(&trace_types_lock);
7883         mutex_unlock(&event_mutex);
7884
7885         return 0;
7886
7887  out_free_tr:
7888         free_trace_buffers(tr);
7889         free_cpumask_var(tr->tracing_cpumask);
7890         kfree(tr->name);
7891         kfree(tr);
7892
7893  out_unlock:
7894         mutex_unlock(&trace_types_lock);
7895         mutex_unlock(&event_mutex);
7896
7897         return ret;
7898
7899 }
7900
7901 static int instance_rmdir(const char *name)
7902 {
7903         struct trace_array *tr;
7904         int found = 0;
7905         int ret;
7906         int i;
7907
7908         mutex_lock(&event_mutex);
7909         mutex_lock(&trace_types_lock);
7910
7911         ret = -ENODEV;
7912         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7913                 if (tr->name && strcmp(tr->name, name) == 0) {
7914                         found = 1;
7915                         break;
7916                 }
7917         }
7918         if (!found)
7919                 goto out_unlock;
7920
7921         ret = -EBUSY;
7922         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7923                 goto out_unlock;
7924
7925         list_del(&tr->list);
7926
7927         /* Disable all the flags that were enabled coming in */
7928         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7929                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7930                         set_tracer_flag(tr, 1 << i, 0);
7931         }
7932
7933         tracing_set_nop(tr);
7934         clear_ftrace_function_probes(tr);
7935         event_trace_del_tracer(tr);
7936         ftrace_clear_pids(tr);
7937         ftrace_destroy_function_files(tr);
7938         tracefs_remove_recursive(tr->dir);
7939         free_trace_buffers(tr);
7940
7941         for (i = 0; i < tr->nr_topts; i++) {
7942                 kfree(tr->topts[i].topts);
7943         }
7944         kfree(tr->topts);
7945
7946         free_cpumask_var(tr->tracing_cpumask);
7947         kfree(tr->name);
7948         kfree(tr);
7949
7950         ret = 0;
7951
7952  out_unlock:
7953         mutex_unlock(&trace_types_lock);
7954         mutex_unlock(&event_mutex);
7955
7956         return ret;
7957 }
7958
7959 static __init void create_trace_instances(struct dentry *d_tracer)
7960 {
7961         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7962                                                          instance_mkdir,
7963                                                          instance_rmdir);
7964         if (WARN_ON(!trace_instance_dir))
7965                 return;
7966 }
7967
7968 static void
7969 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7970 {
7971         struct trace_event_file *file;
7972         int cpu;
7973
7974         trace_create_file("available_tracers", 0444, d_tracer,
7975                         tr, &show_traces_fops);
7976
7977         trace_create_file("current_tracer", 0644, d_tracer,
7978                         tr, &set_tracer_fops);
7979
7980         trace_create_file("tracing_cpumask", 0644, d_tracer,
7981                           tr, &tracing_cpumask_fops);
7982
7983         trace_create_file("trace_options", 0644, d_tracer,
7984                           tr, &tracing_iter_fops);
7985
7986         trace_create_file("trace", 0644, d_tracer,
7987                           tr, &tracing_fops);
7988
7989         trace_create_file("trace_pipe", 0444, d_tracer,
7990                           tr, &tracing_pipe_fops);
7991
7992         trace_create_file("buffer_size_kb", 0644, d_tracer,
7993                           tr, &tracing_entries_fops);
7994
7995         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7996                           tr, &tracing_total_entries_fops);
7997
7998         trace_create_file("free_buffer", 0200, d_tracer,
7999                           tr, &tracing_free_buffer_fops);
8000
8001         trace_create_file("trace_marker", 0220, d_tracer,
8002                           tr, &tracing_mark_fops);
8003
8004         file = __find_event_file(tr, "ftrace", "print");
8005         if (file && file->dir)
8006                 trace_create_file("trigger", 0644, file->dir, file,
8007                                   &event_trigger_fops);
8008         tr->trace_marker_file = file;
8009
8010         trace_create_file("trace_marker_raw", 0220, d_tracer,
8011                           tr, &tracing_mark_raw_fops);
8012
8013         trace_create_file("trace_clock", 0644, d_tracer, tr,
8014                           &trace_clock_fops);
8015
8016         trace_create_file("tracing_on", 0644, d_tracer,
8017                           tr, &rb_simple_fops);
8018
8019         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8020                           &trace_time_stamp_mode_fops);
8021
8022         create_trace_options_dir(tr);
8023
8024 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8025         trace_create_file("tracing_max_latency", 0644, d_tracer,
8026                         &tr->max_latency, &tracing_max_lat_fops);
8027 #endif
8028
8029         if (ftrace_create_function_files(tr, d_tracer))
8030                 WARN(1, "Could not allocate function filter files");
8031
8032 #ifdef CONFIG_TRACER_SNAPSHOT
8033         trace_create_file("snapshot", 0644, d_tracer,
8034                           tr, &snapshot_fops);
8035 #endif
8036
8037         for_each_tracing_cpu(cpu)
8038                 tracing_init_tracefs_percpu(tr, cpu);
8039
8040         ftrace_init_tracefs(tr, d_tracer);
8041 }
8042
8043 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8044 {
8045         struct vfsmount *mnt;
8046         struct file_system_type *type;
8047
8048         /*
8049          * To maintain backward compatibility for tools that mount
8050          * debugfs to get to the tracing facility, tracefs is automatically
8051          * mounted to the debugfs/tracing directory.
8052          */
8053         type = get_fs_type("tracefs");
8054         if (!type)
8055                 return NULL;
8056         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8057         put_filesystem(type);
8058         if (IS_ERR(mnt))
8059                 return NULL;
8060         mntget(mnt);
8061
8062         return mnt;
8063 }
8064
8065 /**
8066  * tracing_init_dentry - initialize top level trace array
8067  *
8068  * This is called when creating files or directories in the tracing
8069  * directory. It is called via fs_initcall() by any of the boot up code
8070  * and expects to return the dentry of the top level tracing directory.
8071  */
8072 struct dentry *tracing_init_dentry(void)
8073 {
8074         struct trace_array *tr = &global_trace;
8075
8076         /* The top level trace array uses  NULL as parent */
8077         if (tr->dir)
8078                 return NULL;
8079
8080         if (WARN_ON(!tracefs_initialized()) ||
8081                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8082                  WARN_ON(!debugfs_initialized())))
8083                 return ERR_PTR(-ENODEV);
8084
8085         /*
8086          * As there may still be users that expect the tracing
8087          * files to exist in debugfs/tracing, we must automount
8088          * the tracefs file system there, so older tools still
8089          * work with the newer kerenl.
8090          */
8091         tr->dir = debugfs_create_automount("tracing", NULL,
8092                                            trace_automount, NULL);
8093         if (!tr->dir) {
8094                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8095                 return ERR_PTR(-ENOMEM);
8096         }
8097
8098         return NULL;
8099 }
8100
8101 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8102 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8103
8104 static void __init trace_eval_init(void)
8105 {
8106         int len;
8107
8108         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8109         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8110 }
8111
8112 #ifdef CONFIG_MODULES
8113 static void trace_module_add_evals(struct module *mod)
8114 {
8115         if (!mod->num_trace_evals)
8116                 return;
8117
8118         /*
8119          * Modules with bad taint do not have events created, do
8120          * not bother with enums either.
8121          */
8122         if (trace_module_has_bad_taint(mod))
8123                 return;
8124
8125         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8126 }
8127
8128 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8129 static void trace_module_remove_evals(struct module *mod)
8130 {
8131         union trace_eval_map_item *map;
8132         union trace_eval_map_item **last = &trace_eval_maps;
8133
8134         if (!mod->num_trace_evals)
8135                 return;
8136
8137         mutex_lock(&trace_eval_mutex);
8138
8139         map = trace_eval_maps;
8140
8141         while (map) {
8142                 if (map->head.mod == mod)
8143                         break;
8144                 map = trace_eval_jmp_to_tail(map);
8145                 last = &map->tail.next;
8146                 map = map->tail.next;
8147         }
8148         if (!map)
8149                 goto out;
8150
8151         *last = trace_eval_jmp_to_tail(map)->tail.next;
8152         kfree(map);
8153  out:
8154         mutex_unlock(&trace_eval_mutex);
8155 }
8156 #else
8157 static inline void trace_module_remove_evals(struct module *mod) { }
8158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8159
8160 static int trace_module_notify(struct notifier_block *self,
8161                                unsigned long val, void *data)
8162 {
8163         struct module *mod = data;
8164
8165         switch (val) {
8166         case MODULE_STATE_COMING:
8167                 trace_module_add_evals(mod);
8168                 break;
8169         case MODULE_STATE_GOING:
8170                 trace_module_remove_evals(mod);
8171                 break;
8172         }
8173
8174         return 0;
8175 }
8176
8177 static struct notifier_block trace_module_nb = {
8178         .notifier_call = trace_module_notify,
8179         .priority = 0,
8180 };
8181 #endif /* CONFIG_MODULES */
8182
8183 static __init int tracer_init_tracefs(void)
8184 {
8185         struct dentry *d_tracer;
8186
8187         trace_access_lock_init();
8188
8189         d_tracer = tracing_init_dentry();
8190         if (IS_ERR(d_tracer))
8191                 return 0;
8192
8193         event_trace_init();
8194
8195         init_tracer_tracefs(&global_trace, d_tracer);
8196         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8197
8198         trace_create_file("tracing_thresh", 0644, d_tracer,
8199                         &global_trace, &tracing_thresh_fops);
8200
8201         trace_create_file("README", 0444, d_tracer,
8202                         NULL, &tracing_readme_fops);
8203
8204         trace_create_file("saved_cmdlines", 0444, d_tracer,
8205                         NULL, &tracing_saved_cmdlines_fops);
8206
8207         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8208                           NULL, &tracing_saved_cmdlines_size_fops);
8209
8210         trace_create_file("saved_tgids", 0444, d_tracer,
8211                         NULL, &tracing_saved_tgids_fops);
8212
8213         trace_eval_init();
8214
8215         trace_create_eval_file(d_tracer);
8216
8217 #ifdef CONFIG_MODULES
8218         register_module_notifier(&trace_module_nb);
8219 #endif
8220
8221 #ifdef CONFIG_DYNAMIC_FTRACE
8222         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8223                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8224 #endif
8225
8226         create_trace_instances(d_tracer);
8227
8228         update_tracer_options(&global_trace);
8229
8230         return 0;
8231 }
8232
8233 static int trace_panic_handler(struct notifier_block *this,
8234                                unsigned long event, void *unused)
8235 {
8236         if (ftrace_dump_on_oops)
8237                 ftrace_dump(ftrace_dump_on_oops);
8238         return NOTIFY_OK;
8239 }
8240
8241 static struct notifier_block trace_panic_notifier = {
8242         .notifier_call  = trace_panic_handler,
8243         .next           = NULL,
8244         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8245 };
8246
8247 static int trace_die_handler(struct notifier_block *self,
8248                              unsigned long val,
8249                              void *data)
8250 {
8251         switch (val) {
8252         case DIE_OOPS:
8253                 if (ftrace_dump_on_oops)
8254                         ftrace_dump(ftrace_dump_on_oops);
8255                 break;
8256         default:
8257                 break;
8258         }
8259         return NOTIFY_OK;
8260 }
8261
8262 static struct notifier_block trace_die_notifier = {
8263         .notifier_call = trace_die_handler,
8264         .priority = 200
8265 };
8266
8267 /*
8268  * printk is set to max of 1024, we really don't need it that big.
8269  * Nothing should be printing 1000 characters anyway.
8270  */
8271 #define TRACE_MAX_PRINT         1000
8272
8273 /*
8274  * Define here KERN_TRACE so that we have one place to modify
8275  * it if we decide to change what log level the ftrace dump
8276  * should be at.
8277  */
8278 #define KERN_TRACE              KERN_EMERG
8279
8280 void
8281 trace_printk_seq(struct trace_seq *s)
8282 {
8283         /* Probably should print a warning here. */
8284         if (s->seq.len >= TRACE_MAX_PRINT)
8285                 s->seq.len = TRACE_MAX_PRINT;
8286
8287         /*
8288          * More paranoid code. Although the buffer size is set to
8289          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8290          * an extra layer of protection.
8291          */
8292         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8293                 s->seq.len = s->seq.size - 1;
8294
8295         /* should be zero ended, but we are paranoid. */
8296         s->buffer[s->seq.len] = 0;
8297
8298         printk(KERN_TRACE "%s", s->buffer);
8299
8300         trace_seq_init(s);
8301 }
8302
8303 void trace_init_global_iter(struct trace_iterator *iter)
8304 {
8305         iter->tr = &global_trace;
8306         iter->trace = iter->tr->current_trace;
8307         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8308         iter->trace_buffer = &global_trace.trace_buffer;
8309
8310         if (iter->trace && iter->trace->open)
8311                 iter->trace->open(iter);
8312
8313         /* Annotate start of buffers if we had overruns */
8314         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8315                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8316
8317         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8318         if (trace_clocks[iter->tr->clock_id].in_ns)
8319                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8320 }
8321
8322 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8323 {
8324         /* use static because iter can be a bit big for the stack */
8325         static struct trace_iterator iter;
8326         static atomic_t dump_running;
8327         struct trace_array *tr = &global_trace;
8328         unsigned int old_userobj;
8329         unsigned long flags;
8330         int cnt = 0, cpu;
8331
8332         /* Only allow one dump user at a time. */
8333         if (atomic_inc_return(&dump_running) != 1) {
8334                 atomic_dec(&dump_running);
8335                 return;
8336         }
8337
8338         /*
8339          * Always turn off tracing when we dump.
8340          * We don't need to show trace output of what happens
8341          * between multiple crashes.
8342          *
8343          * If the user does a sysrq-z, then they can re-enable
8344          * tracing with echo 1 > tracing_on.
8345          */
8346         tracing_off();
8347
8348         local_irq_save(flags);
8349         printk_nmi_direct_enter();
8350
8351         /* Simulate the iterator */
8352         trace_init_global_iter(&iter);
8353
8354         for_each_tracing_cpu(cpu) {
8355                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8356         }
8357
8358         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8359
8360         /* don't look at user memory in panic mode */
8361         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8362
8363         switch (oops_dump_mode) {
8364         case DUMP_ALL:
8365                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8366                 break;
8367         case DUMP_ORIG:
8368                 iter.cpu_file = raw_smp_processor_id();
8369                 break;
8370         case DUMP_NONE:
8371                 goto out_enable;
8372         default:
8373                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8374                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8375         }
8376
8377         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8378
8379         /* Did function tracer already get disabled? */
8380         if (ftrace_is_dead()) {
8381                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8382                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8383         }
8384
8385         /*
8386          * We need to stop all tracing on all CPUS to read the
8387          * the next buffer. This is a bit expensive, but is
8388          * not done often. We fill all what we can read,
8389          * and then release the locks again.
8390          */
8391
8392         while (!trace_empty(&iter)) {
8393
8394                 if (!cnt)
8395                         printk(KERN_TRACE "---------------------------------\n");
8396
8397                 cnt++;
8398
8399                 trace_iterator_reset(&iter);
8400                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8401
8402                 if (trace_find_next_entry_inc(&iter) != NULL) {
8403                         int ret;
8404
8405                         ret = print_trace_line(&iter);
8406                         if (ret != TRACE_TYPE_NO_CONSUME)
8407                                 trace_consume(&iter);
8408                 }
8409                 touch_nmi_watchdog();
8410
8411                 trace_printk_seq(&iter.seq);
8412         }
8413
8414         if (!cnt)
8415                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8416         else
8417                 printk(KERN_TRACE "---------------------------------\n");
8418
8419  out_enable:
8420         tr->trace_flags |= old_userobj;
8421
8422         for_each_tracing_cpu(cpu) {
8423                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8424         }
8425         atomic_dec(&dump_running);
8426         printk_nmi_direct_exit();
8427         local_irq_restore(flags);
8428 }
8429 EXPORT_SYMBOL_GPL(ftrace_dump);
8430
8431 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8432 {
8433         char **argv;
8434         int argc, ret;
8435
8436         argc = 0;
8437         ret = 0;
8438         argv = argv_split(GFP_KERNEL, buf, &argc);
8439         if (!argv)
8440                 return -ENOMEM;
8441
8442         if (argc)
8443                 ret = createfn(argc, argv);
8444
8445         argv_free(argv);
8446
8447         return ret;
8448 }
8449
8450 #define WRITE_BUFSIZE  4096
8451
8452 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8453                                 size_t count, loff_t *ppos,
8454                                 int (*createfn)(int, char **))
8455 {
8456         char *kbuf, *buf, *tmp;
8457         int ret = 0;
8458         size_t done = 0;
8459         size_t size;
8460
8461         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8462         if (!kbuf)
8463                 return -ENOMEM;
8464
8465         while (done < count) {
8466                 size = count - done;
8467
8468                 if (size >= WRITE_BUFSIZE)
8469                         size = WRITE_BUFSIZE - 1;
8470
8471                 if (copy_from_user(kbuf, buffer + done, size)) {
8472                         ret = -EFAULT;
8473                         goto out;
8474                 }
8475                 kbuf[size] = '\0';
8476                 buf = kbuf;
8477                 do {
8478                         tmp = strchr(buf, '\n');
8479                         if (tmp) {
8480                                 *tmp = '\0';
8481                                 size = tmp - buf + 1;
8482                         } else {
8483                                 size = strlen(buf);
8484                                 if (done + size < count) {
8485                                         if (buf != kbuf)
8486                                                 break;
8487                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8488                                         pr_warn("Line length is too long: Should be less than %d\n",
8489                                                 WRITE_BUFSIZE - 2);
8490                                         ret = -EINVAL;
8491                                         goto out;
8492                                 }
8493                         }
8494                         done += size;
8495
8496                         /* Remove comments */
8497                         tmp = strchr(buf, '#');
8498
8499                         if (tmp)
8500                                 *tmp = '\0';
8501
8502                         ret = trace_run_command(buf, createfn);
8503                         if (ret)
8504                                 goto out;
8505                         buf += size;
8506
8507                 } while (done < count);
8508         }
8509         ret = done;
8510
8511 out:
8512         kfree(kbuf);
8513
8514         return ret;
8515 }
8516
8517 __init static int tracer_alloc_buffers(void)
8518 {
8519         int ring_buf_size;
8520         int ret = -ENOMEM;
8521
8522         /*
8523          * Make sure we don't accidently add more trace options
8524          * than we have bits for.
8525          */
8526         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8527
8528         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8529                 goto out;
8530
8531         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8532                 goto out_free_buffer_mask;
8533
8534         /* Only allocate trace_printk buffers if a trace_printk exists */
8535         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8536                 /* Must be called before global_trace.buffer is allocated */
8537                 trace_printk_init_buffers();
8538
8539         /* To save memory, keep the ring buffer size to its minimum */
8540         if (ring_buffer_expanded)
8541                 ring_buf_size = trace_buf_size;
8542         else
8543                 ring_buf_size = 1;
8544
8545         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8546         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8547
8548         raw_spin_lock_init(&global_trace.start_lock);
8549
8550         /*
8551          * The prepare callbacks allocates some memory for the ring buffer. We
8552          * don't free the buffer if the if the CPU goes down. If we were to free
8553          * the buffer, then the user would lose any trace that was in the
8554          * buffer. The memory will be removed once the "instance" is removed.
8555          */
8556         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8557                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8558                                       NULL);
8559         if (ret < 0)
8560                 goto out_free_cpumask;
8561         /* Used for event triggers */
8562         ret = -ENOMEM;
8563         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8564         if (!temp_buffer)
8565                 goto out_rm_hp_state;
8566
8567         if (trace_create_savedcmd() < 0)
8568                 goto out_free_temp_buffer;
8569
8570         /* TODO: make the number of buffers hot pluggable with CPUS */
8571         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8572                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8573                 WARN_ON(1);
8574                 goto out_free_savedcmd;
8575         }
8576
8577         if (global_trace.buffer_disabled)
8578                 tracing_off();
8579
8580         if (trace_boot_clock) {
8581                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8582                 if (ret < 0)
8583                         pr_warn("Trace clock %s not defined, going back to default\n",
8584                                 trace_boot_clock);
8585         }
8586
8587         /*
8588          * register_tracer() might reference current_trace, so it
8589          * needs to be set before we register anything. This is
8590          * just a bootstrap of current_trace anyway.
8591          */
8592         global_trace.current_trace = &nop_trace;
8593
8594         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8595
8596         ftrace_init_global_array_ops(&global_trace);
8597
8598         init_trace_flags_index(&global_trace);
8599
8600         register_tracer(&nop_trace);
8601
8602         /* Function tracing may start here (via kernel command line) */
8603         init_function_trace();
8604
8605         /* All seems OK, enable tracing */
8606         tracing_disabled = 0;
8607
8608         atomic_notifier_chain_register(&panic_notifier_list,
8609                                        &trace_panic_notifier);
8610
8611         register_die_notifier(&trace_die_notifier);
8612
8613         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8614
8615         INIT_LIST_HEAD(&global_trace.systems);
8616         INIT_LIST_HEAD(&global_trace.events);
8617         INIT_LIST_HEAD(&global_trace.hist_vars);
8618         list_add(&global_trace.list, &ftrace_trace_arrays);
8619
8620         apply_trace_boot_options();
8621
8622         register_snapshot_cmd();
8623
8624         return 0;
8625
8626 out_free_savedcmd:
8627         free_saved_cmdlines_buffer(savedcmd);
8628 out_free_temp_buffer:
8629         ring_buffer_free(temp_buffer);
8630 out_rm_hp_state:
8631         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8632 out_free_cpumask:
8633         free_cpumask_var(global_trace.tracing_cpumask);
8634 out_free_buffer_mask:
8635         free_cpumask_var(tracing_buffer_mask);
8636 out:
8637         return ret;
8638 }
8639
8640 void __init early_trace_init(void)
8641 {
8642         if (tracepoint_printk) {
8643                 tracepoint_print_iter =
8644                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8645                 if (WARN_ON(!tracepoint_print_iter))
8646                         tracepoint_printk = 0;
8647                 else
8648                         static_key_enable(&tracepoint_printk_key.key);
8649         }
8650         tracer_alloc_buffers();
8651 }
8652
8653 void __init trace_init(void)
8654 {
8655         trace_event_init();
8656 }
8657
8658 __init static int clear_boot_tracer(void)
8659 {
8660         /*
8661          * The default tracer at boot buffer is an init section.
8662          * This function is called in lateinit. If we did not
8663          * find the boot tracer, then clear it out, to prevent
8664          * later registration from accessing the buffer that is
8665          * about to be freed.
8666          */
8667         if (!default_bootup_tracer)
8668                 return 0;
8669
8670         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8671                default_bootup_tracer);
8672         default_bootup_tracer = NULL;
8673
8674         return 0;
8675 }
8676
8677 fs_initcall(tracer_init_tracefs);
8678 late_initcall_sync(clear_boot_tracer);
8679
8680 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8681 __init static int tracing_set_default_clock(void)
8682 {
8683         /* sched_clock_stable() is determined in late_initcall */
8684         if (!trace_boot_clock && !sched_clock_stable()) {
8685                 printk(KERN_WARNING
8686                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8687                        "If you want to keep using the local clock, then add:\n"
8688                        "  \"trace_clock=local\"\n"
8689                        "on the kernel command line\n");
8690                 tracing_set_clock(&global_trace, "global");
8691         }
8692
8693         return 0;
8694 }
8695 late_initcall_sync(tracing_set_default_clock);
8696 #endif