GNU Linux-libre 4.9.314-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         /* Ignore the "tp_printk_stop_on_boot" param */
232         if (*str == '_')
233                 return 0;
234
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(cycle_t nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         TRACE_ITER_EVENT_FORK
263
264 /*
265  * The global_trace is the descriptor that holds the tracing
266  * buffers for the live tracing. For each CPU, it contains
267  * a link list of pages that will store trace entries. The
268  * page descriptor of the pages in the memory is used to hold
269  * the link list by linking the lru item in the page descriptor
270  * to each of the pages in the buffer per CPU.
271  *
272  * For each active CPU there is a data field that holds the
273  * pages for the buffer for that CPU. Each CPU has the same number
274  * of pages allocated for its buffer.
275  */
276 static struct trace_array global_trace = {
277         .trace_flags = TRACE_DEFAULT_FLAGS,
278 };
279
280 LIST_HEAD(ftrace_trace_arrays);
281
282 int trace_array_get(struct trace_array *this_tr)
283 {
284         struct trace_array *tr;
285         int ret = -ENODEV;
286
287         mutex_lock(&trace_types_lock);
288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
289                 if (tr == this_tr) {
290                         tr->ref++;
291                         ret = 0;
292                         break;
293                 }
294         }
295         mutex_unlock(&trace_types_lock);
296
297         return ret;
298 }
299
300 static void __trace_array_put(struct trace_array *this_tr)
301 {
302         WARN_ON(!this_tr->ref);
303         this_tr->ref--;
304 }
305
306 void trace_array_put(struct trace_array *this_tr)
307 {
308         mutex_lock(&trace_types_lock);
309         __trace_array_put(this_tr);
310         mutex_unlock(&trace_types_lock);
311 }
312
313 int call_filter_check_discard(struct trace_event_call *call, void *rec,
314                               struct ring_buffer *buffer,
315                               struct ring_buffer_event *event)
316 {
317         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
318             !filter_match_preds(call->filter, rec)) {
319                 __trace_event_discard_commit(buffer, event);
320                 return 1;
321         }
322
323         return 0;
324 }
325
326 void trace_free_pid_list(struct trace_pid_list *pid_list)
327 {
328         vfree(pid_list->pids);
329         kfree(pid_list);
330 }
331
332 /**
333  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
334  * @filtered_pids: The list of pids to check
335  * @search_pid: The PID to find in @filtered_pids
336  *
337  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
338  */
339 bool
340 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
341 {
342         /*
343          * If pid_max changed after filtered_pids was created, we
344          * by default ignore all pids greater than the previous pid_max.
345          */
346         if (search_pid >= filtered_pids->pid_max)
347                 return false;
348
349         return test_bit(search_pid, filtered_pids->pids);
350 }
351
352 /**
353  * trace_ignore_this_task - should a task be ignored for tracing
354  * @filtered_pids: The list of pids to check
355  * @task: The task that should be ignored if not filtered
356  *
357  * Checks if @task should be traced or not from @filtered_pids.
358  * Returns true if @task should *NOT* be traced.
359  * Returns false if @task should be traced.
360  */
361 bool
362 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
363 {
364         /*
365          * Return false, because if filtered_pids does not exist,
366          * all pids are good to trace.
367          */
368         if (!filtered_pids)
369                 return false;
370
371         return !trace_find_filtered_pid(filtered_pids, task->pid);
372 }
373
374 /**
375  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
376  * @pid_list: The list to modify
377  * @self: The current task for fork or NULL for exit
378  * @task: The task to add or remove
379  *
380  * If adding a task, if @self is defined, the task is only added if @self
381  * is also included in @pid_list. This happens on fork and tasks should
382  * only be added when the parent is listed. If @self is NULL, then the
383  * @task pid will be removed from the list, which would happen on exit
384  * of a task.
385  */
386 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
387                                   struct task_struct *self,
388                                   struct task_struct *task)
389 {
390         if (!pid_list)
391                 return;
392
393         /* For forks, we only add if the forking task is listed */
394         if (self) {
395                 if (!trace_find_filtered_pid(pid_list, self->pid))
396                         return;
397         }
398
399         /* Sorry, but we don't support pid_max changing after setting */
400         if (task->pid >= pid_list->pid_max)
401                 return;
402
403         /* "self" is set for forks, and NULL for exits */
404         if (self)
405                 set_bit(task->pid, pid_list->pids);
406         else
407                 clear_bit(task->pid, pid_list->pids);
408 }
409
410 /**
411  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
412  * @pid_list: The pid list to show
413  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
414  * @pos: The position of the file
415  *
416  * This is used by the seq_file "next" operation to iterate the pids
417  * listed in a trace_pid_list structure.
418  *
419  * Returns the pid+1 as we want to display pid of zero, but NULL would
420  * stop the iteration.
421  */
422 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
423 {
424         unsigned long pid = (unsigned long)v;
425
426         (*pos)++;
427
428         /* pid already is +1 of the actual prevous bit */
429         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
430
431         /* Return pid + 1 to allow zero to be represented */
432         if (pid < pid_list->pid_max)
433                 return (void *)(pid + 1);
434
435         return NULL;
436 }
437
438 /**
439  * trace_pid_start - Used for seq_file to start reading pid lists
440  * @pid_list: The pid list to show
441  * @pos: The position of the file
442  *
443  * This is used by seq_file "start" operation to start the iteration
444  * of listing pids.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
450 {
451         unsigned long pid;
452         loff_t l = 0;
453
454         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
455         if (pid >= pid_list->pid_max)
456                 return NULL;
457
458         /* Return pid + 1 so that zero can be the exit value */
459         for (pid++; pid && l < *pos;
460              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
461                 ;
462         return (void *)pid;
463 }
464
465 /**
466  * trace_pid_show - show the current pid in seq_file processing
467  * @m: The seq_file structure to write into
468  * @v: A void pointer of the pid (+1) value to display
469  *
470  * Can be directly used by seq_file operations to display the current
471  * pid value.
472  */
473 int trace_pid_show(struct seq_file *m, void *v)
474 {
475         unsigned long pid = (unsigned long)v - 1;
476
477         seq_printf(m, "%lu\n", pid);
478         return 0;
479 }
480
481 /* 128 should be much more than enough */
482 #define PID_BUF_SIZE            127
483
484 int trace_pid_write(struct trace_pid_list *filtered_pids,
485                     struct trace_pid_list **new_pid_list,
486                     const char __user *ubuf, size_t cnt)
487 {
488         struct trace_pid_list *pid_list;
489         struct trace_parser parser;
490         unsigned long val;
491         int nr_pids = 0;
492         ssize_t read = 0;
493         ssize_t ret = 0;
494         loff_t pos;
495         pid_t pid;
496
497         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
498                 return -ENOMEM;
499
500         /*
501          * Always recreate a new array. The write is an all or nothing
502          * operation. Always create a new array when adding new pids by
503          * the user. If the operation fails, then the current list is
504          * not modified.
505          */
506         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
507         if (!pid_list) {
508                 trace_parser_put(&parser);
509                 return -ENOMEM;
510         }
511
512         pid_list->pid_max = READ_ONCE(pid_max);
513
514         /* Only truncating will shrink pid_max */
515         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
516                 pid_list->pid_max = filtered_pids->pid_max;
517
518         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
519         if (!pid_list->pids) {
520                 trace_parser_put(&parser);
521                 kfree(pid_list);
522                 return -ENOMEM;
523         }
524
525         if (filtered_pids) {
526                 /* copy the current bits to the new max */
527                 for_each_set_bit(pid, filtered_pids->pids,
528                                  filtered_pids->pid_max) {
529                         set_bit(pid, pid_list->pids);
530                         nr_pids++;
531                 }
532         }
533
534         while (cnt > 0) {
535
536                 pos = 0;
537
538                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
539                 if (ret < 0 || !trace_parser_loaded(&parser))
540                         break;
541
542                 read += ret;
543                 ubuf += ret;
544                 cnt -= ret;
545
546                 parser.buffer[parser.idx] = 0;
547
548                 ret = -EINVAL;
549                 if (kstrtoul(parser.buffer, 0, &val))
550                         break;
551                 if (val >= pid_list->pid_max)
552                         break;
553
554                 pid = (pid_t)val;
555
556                 set_bit(pid, pid_list->pids);
557                 nr_pids++;
558
559                 trace_parser_clear(&parser);
560                 ret = 0;
561         }
562         trace_parser_put(&parser);
563
564         if (ret < 0) {
565                 trace_free_pid_list(pid_list);
566                 return ret;
567         }
568
569         if (!nr_pids) {
570                 /* Cleared the list of pids */
571                 trace_free_pid_list(pid_list);
572                 read = ret;
573                 pid_list = NULL;
574         }
575
576         *new_pid_list = pid_list;
577
578         return read;
579 }
580
581 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
582 {
583         u64 ts;
584
585         /* Early boot up does not have a buffer yet */
586         if (!buf->buffer)
587                 return trace_clock_local();
588
589         ts = ring_buffer_time_stamp(buf->buffer, cpu);
590         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
591
592         return ts;
593 }
594
595 cycle_t ftrace_now(int cpu)
596 {
597         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
598 }
599
600 /**
601  * tracing_is_enabled - Show if global_trace has been disabled
602  *
603  * Shows if the global trace has been enabled or not. It uses the
604  * mirror flag "buffer_disabled" to be used in fast paths such as for
605  * the irqsoff tracer. But it may be inaccurate due to races. If you
606  * need to know the accurate state, use tracing_is_on() which is a little
607  * slower, but accurate.
608  */
609 int tracing_is_enabled(void)
610 {
611         /*
612          * For quick access (irqsoff uses this in fast path), just
613          * return the mirror variable of the state of the ring buffer.
614          * It's a little racy, but we don't really care.
615          */
616         smp_rmb();
617         return !global_trace.buffer_disabled;
618 }
619
620 /*
621  * trace_buf_size is the size in bytes that is allocated
622  * for a buffer. Note, the number of bytes is always rounded
623  * to page size.
624  *
625  * This number is purposely set to a low number of 16384.
626  * If the dump on oops happens, it will be much appreciated
627  * to not have to wait for all that output. Anyway this can be
628  * boot time and run time configurable.
629  */
630 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
631
632 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
633
634 /* trace_types holds a link list of available tracers. */
635 static struct tracer            *trace_types __read_mostly;
636
637 /*
638  * trace_types_lock is used to protect the trace_types list.
639  */
640 DEFINE_MUTEX(trace_types_lock);
641
642 /*
643  * serialize the access of the ring buffer
644  *
645  * ring buffer serializes readers, but it is low level protection.
646  * The validity of the events (which returns by ring_buffer_peek() ..etc)
647  * are not protected by ring buffer.
648  *
649  * The content of events may become garbage if we allow other process consumes
650  * these events concurrently:
651  *   A) the page of the consumed events may become a normal page
652  *      (not reader page) in ring buffer, and this page will be rewrited
653  *      by events producer.
654  *   B) The page of the consumed events may become a page for splice_read,
655  *      and this page will be returned to system.
656  *
657  * These primitives allow multi process access to different cpu ring buffer
658  * concurrently.
659  *
660  * These primitives don't distinguish read-only and read-consume access.
661  * Multi read-only access are also serialized.
662  */
663
664 #ifdef CONFIG_SMP
665 static DECLARE_RWSEM(all_cpu_access_lock);
666 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
667
668 static inline void trace_access_lock(int cpu)
669 {
670         if (cpu == RING_BUFFER_ALL_CPUS) {
671                 /* gain it for accessing the whole ring buffer. */
672                 down_write(&all_cpu_access_lock);
673         } else {
674                 /* gain it for accessing a cpu ring buffer. */
675
676                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
677                 down_read(&all_cpu_access_lock);
678
679                 /* Secondly block other access to this @cpu ring buffer. */
680                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
681         }
682 }
683
684 static inline void trace_access_unlock(int cpu)
685 {
686         if (cpu == RING_BUFFER_ALL_CPUS) {
687                 up_write(&all_cpu_access_lock);
688         } else {
689                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
690                 up_read(&all_cpu_access_lock);
691         }
692 }
693
694 static inline void trace_access_lock_init(void)
695 {
696         int cpu;
697
698         for_each_possible_cpu(cpu)
699                 mutex_init(&per_cpu(cpu_access_lock, cpu));
700 }
701
702 #else
703
704 static DEFINE_MUTEX(access_lock);
705
706 static inline void trace_access_lock(int cpu)
707 {
708         (void)cpu;
709         mutex_lock(&access_lock);
710 }
711
712 static inline void trace_access_unlock(int cpu)
713 {
714         (void)cpu;
715         mutex_unlock(&access_lock);
716 }
717
718 static inline void trace_access_lock_init(void)
719 {
720 }
721
722 #endif
723
724 #ifdef CONFIG_STACKTRACE
725 static void __ftrace_trace_stack(struct ring_buffer *buffer,
726                                  unsigned long flags,
727                                  int skip, int pc, struct pt_regs *regs);
728 static inline void ftrace_trace_stack(struct trace_array *tr,
729                                       struct ring_buffer *buffer,
730                                       unsigned long flags,
731                                       int skip, int pc, struct pt_regs *regs);
732
733 #else
734 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
735                                         unsigned long flags,
736                                         int skip, int pc, struct pt_regs *regs)
737 {
738 }
739 static inline void ftrace_trace_stack(struct trace_array *tr,
740                                       struct ring_buffer *buffer,
741                                       unsigned long flags,
742                                       int skip, int pc, struct pt_regs *regs)
743 {
744 }
745
746 #endif
747
748 static void tracer_tracing_on(struct trace_array *tr)
749 {
750         if (tr->trace_buffer.buffer)
751                 ring_buffer_record_on(tr->trace_buffer.buffer);
752         /*
753          * This flag is looked at when buffers haven't been allocated
754          * yet, or by some tracers (like irqsoff), that just want to
755          * know if the ring buffer has been disabled, but it can handle
756          * races of where it gets disabled but we still do a record.
757          * As the check is in the fast path of the tracers, it is more
758          * important to be fast than accurate.
759          */
760         tr->buffer_disabled = 0;
761         /* Make the flag seen by readers */
762         smp_wmb();
763 }
764
765 /**
766  * tracing_on - enable tracing buffers
767  *
768  * This function enables tracing buffers that may have been
769  * disabled with tracing_off.
770  */
771 void tracing_on(void)
772 {
773         tracer_tracing_on(&global_trace);
774 }
775 EXPORT_SYMBOL_GPL(tracing_on);
776
777 /**
778  * __trace_puts - write a constant string into the trace buffer.
779  * @ip:    The address of the caller
780  * @str:   The constant string to write
781  * @size:  The size of the string.
782  */
783 int __trace_puts(unsigned long ip, const char *str, int size)
784 {
785         struct ring_buffer_event *event;
786         struct ring_buffer *buffer;
787         struct print_entry *entry;
788         unsigned long irq_flags;
789         int alloc;
790         int pc;
791
792         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
793                 return 0;
794
795         pc = preempt_count();
796
797         if (unlikely(tracing_selftest_running || tracing_disabled))
798                 return 0;
799
800         alloc = sizeof(*entry) + size + 2; /* possible \n added */
801
802         local_save_flags(irq_flags);
803         buffer = global_trace.trace_buffer.buffer;
804         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
805                                           irq_flags, pc);
806         if (!event)
807                 return 0;
808
809         entry = ring_buffer_event_data(event);
810         entry->ip = ip;
811
812         memcpy(&entry->buf, str, size);
813
814         /* Add a newline if necessary */
815         if (entry->buf[size - 1] != '\n') {
816                 entry->buf[size] = '\n';
817                 entry->buf[size + 1] = '\0';
818         } else
819                 entry->buf[size] = '\0';
820
821         __buffer_unlock_commit(buffer, event);
822         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
823
824         return size;
825 }
826 EXPORT_SYMBOL_GPL(__trace_puts);
827
828 /**
829  * __trace_bputs - write the pointer to a constant string into trace buffer
830  * @ip:    The address of the caller
831  * @str:   The constant string to write to the buffer to
832  */
833 int __trace_bputs(unsigned long ip, const char *str)
834 {
835         struct ring_buffer_event *event;
836         struct ring_buffer *buffer;
837         struct bputs_entry *entry;
838         unsigned long irq_flags;
839         int size = sizeof(struct bputs_entry);
840         int pc;
841
842         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843                 return 0;
844
845         pc = preempt_count();
846
847         if (unlikely(tracing_selftest_running || tracing_disabled))
848                 return 0;
849
850         local_save_flags(irq_flags);
851         buffer = global_trace.trace_buffer.buffer;
852         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
853                                           irq_flags, pc);
854         if (!event)
855                 return 0;
856
857         entry = ring_buffer_event_data(event);
858         entry->ip                       = ip;
859         entry->str                      = str;
860
861         __buffer_unlock_commit(buffer, event);
862         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
863
864         return 1;
865 }
866 EXPORT_SYMBOL_GPL(__trace_bputs);
867
868 #ifdef CONFIG_TRACER_SNAPSHOT
869 /**
870  * trace_snapshot - take a snapshot of the current buffer.
871  *
872  * This causes a swap between the snapshot buffer and the current live
873  * tracing buffer. You can use this to take snapshots of the live
874  * trace when some condition is triggered, but continue to trace.
875  *
876  * Note, make sure to allocate the snapshot with either
877  * a tracing_snapshot_alloc(), or by doing it manually
878  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
879  *
880  * If the snapshot buffer is not allocated, it will stop tracing.
881  * Basically making a permanent snapshot.
882  */
883 void tracing_snapshot(void)
884 {
885         struct trace_array *tr = &global_trace;
886         struct tracer *tracer = tr->current_trace;
887         unsigned long flags;
888
889         if (in_nmi()) {
890                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
891                 internal_trace_puts("*** snapshot is being ignored        ***\n");
892                 return;
893         }
894
895         if (!tr->allocated_snapshot) {
896                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
897                 internal_trace_puts("*** stopping trace here!   ***\n");
898                 tracing_off();
899                 return;
900         }
901
902         /* Note, snapshot can not be used when the tracer uses it */
903         if (tracer->use_max_tr) {
904                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
905                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
906                 return;
907         }
908
909         local_irq_save(flags);
910         update_max_tr(tr, current, smp_processor_id());
911         local_irq_restore(flags);
912 }
913 EXPORT_SYMBOL_GPL(tracing_snapshot);
914
915 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
916                                         struct trace_buffer *size_buf, int cpu_id);
917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
918
919 static int alloc_snapshot(struct trace_array *tr)
920 {
921         int ret;
922
923         if (!tr->allocated_snapshot) {
924
925                 /* allocate spare buffer */
926                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
927                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
928                 if (ret < 0)
929                         return ret;
930
931                 tr->allocated_snapshot = true;
932         }
933
934         return 0;
935 }
936
937 static void free_snapshot(struct trace_array *tr)
938 {
939         /*
940          * We don't free the ring buffer. instead, resize it because
941          * The max_tr ring buffer has some state (e.g. ring->clock) and
942          * we want preserve it.
943          */
944         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
945         set_buffer_entries(&tr->max_buffer, 1);
946         tracing_reset_online_cpus(&tr->max_buffer);
947         tr->allocated_snapshot = false;
948 }
949
950 /**
951  * tracing_alloc_snapshot - allocate snapshot buffer.
952  *
953  * This only allocates the snapshot buffer if it isn't already
954  * allocated - it doesn't also take a snapshot.
955  *
956  * This is meant to be used in cases where the snapshot buffer needs
957  * to be set up for events that can't sleep but need to be able to
958  * trigger a snapshot.
959  */
960 int tracing_alloc_snapshot(void)
961 {
962         struct trace_array *tr = &global_trace;
963         int ret;
964
965         ret = alloc_snapshot(tr);
966         WARN_ON(ret < 0);
967
968         return ret;
969 }
970 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
971
972 /**
973  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
974  *
975  * This is similar to trace_snapshot(), but it will allocate the
976  * snapshot buffer if it isn't already allocated. Use this only
977  * where it is safe to sleep, as the allocation may sleep.
978  *
979  * This causes a swap between the snapshot buffer and the current live
980  * tracing buffer. You can use this to take snapshots of the live
981  * trace when some condition is triggered, but continue to trace.
982  */
983 void tracing_snapshot_alloc(void)
984 {
985         int ret;
986
987         ret = tracing_alloc_snapshot();
988         if (ret < 0)
989                 return;
990
991         tracing_snapshot();
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
994 #else
995 void tracing_snapshot(void)
996 {
997         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000 int tracing_alloc_snapshot(void)
1001 {
1002         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1003         return -ENODEV;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1006 void tracing_snapshot_alloc(void)
1007 {
1008         /* Give warning */
1009         tracing_snapshot();
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1012 #endif /* CONFIG_TRACER_SNAPSHOT */
1013
1014 static void tracer_tracing_off(struct trace_array *tr)
1015 {
1016         if (tr->trace_buffer.buffer)
1017                 ring_buffer_record_off(tr->trace_buffer.buffer);
1018         /*
1019          * This flag is looked at when buffers haven't been allocated
1020          * yet, or by some tracers (like irqsoff), that just want to
1021          * know if the ring buffer has been disabled, but it can handle
1022          * races of where it gets disabled but we still do a record.
1023          * As the check is in the fast path of the tracers, it is more
1024          * important to be fast than accurate.
1025          */
1026         tr->buffer_disabled = 1;
1027         /* Make the flag seen by readers */
1028         smp_wmb();
1029 }
1030
1031 /**
1032  * tracing_off - turn off tracing buffers
1033  *
1034  * This function stops the tracing buffers from recording data.
1035  * It does not disable any overhead the tracers themselves may
1036  * be causing. This function simply causes all recording to
1037  * the ring buffers to fail.
1038  */
1039 void tracing_off(void)
1040 {
1041         tracer_tracing_off(&global_trace);
1042 }
1043 EXPORT_SYMBOL_GPL(tracing_off);
1044
1045 void disable_trace_on_warning(void)
1046 {
1047         if (__disable_trace_on_warning)
1048                 tracing_off();
1049 }
1050
1051 /**
1052  * tracer_tracing_is_on - show real state of ring buffer enabled
1053  * @tr : the trace array to know if ring buffer is enabled
1054  *
1055  * Shows real state of the ring buffer if it is enabled or not.
1056  */
1057 int tracer_tracing_is_on(struct trace_array *tr)
1058 {
1059         if (tr->trace_buffer.buffer)
1060                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1061         return !tr->buffer_disabled;
1062 }
1063
1064 /**
1065  * tracing_is_on - show state of ring buffers enabled
1066  */
1067 int tracing_is_on(void)
1068 {
1069         return tracer_tracing_is_on(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_is_on);
1072
1073 static int __init set_buf_size(char *str)
1074 {
1075         unsigned long buf_size;
1076
1077         if (!str)
1078                 return 0;
1079         buf_size = memparse(str, &str);
1080         /*
1081          * nr_entries can not be zero and the startup
1082          * tests require some buffer space. Therefore
1083          * ensure we have at least 4096 bytes of buffer.
1084          */
1085         trace_buf_size = max(4096UL, buf_size);
1086         return 1;
1087 }
1088 __setup("trace_buf_size=", set_buf_size);
1089
1090 static int __init set_tracing_thresh(char *str)
1091 {
1092         unsigned long threshold;
1093         int ret;
1094
1095         if (!str)
1096                 return 0;
1097         ret = kstrtoul(str, 0, &threshold);
1098         if (ret < 0)
1099                 return 0;
1100         tracing_thresh = threshold * 1000;
1101         return 1;
1102 }
1103 __setup("tracing_thresh=", set_tracing_thresh);
1104
1105 unsigned long nsecs_to_usecs(unsigned long nsecs)
1106 {
1107         return nsecs / 1000;
1108 }
1109
1110 /*
1111  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1112  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1113  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1114  * of strings in the order that the enums were defined.
1115  */
1116 #undef C
1117 #define C(a, b) b
1118
1119 /* These must match the bit postions in trace_iterator_flags */
1120 static const char *trace_options[] = {
1121         TRACE_FLAGS
1122         NULL
1123 };
1124
1125 static struct {
1126         u64 (*func)(void);
1127         const char *name;
1128         int in_ns;              /* is this clock in nanoseconds? */
1129 } trace_clocks[] = {
1130         { trace_clock_local,            "local",        1 },
1131         { trace_clock_global,           "global",       1 },
1132         { trace_clock_counter,          "counter",      0 },
1133         { trace_clock_jiffies,          "uptime",       0 },
1134         { trace_clock,                  "perf",         1 },
1135         { ktime_get_mono_fast_ns,       "mono",         1 },
1136         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1137         ARCH_TRACE_CLOCKS
1138 };
1139
1140 /*
1141  * trace_parser_get_init - gets the buffer for trace parser
1142  */
1143 int trace_parser_get_init(struct trace_parser *parser, int size)
1144 {
1145         memset(parser, 0, sizeof(*parser));
1146
1147         parser->buffer = kmalloc(size, GFP_KERNEL);
1148         if (!parser->buffer)
1149                 return 1;
1150
1151         parser->size = size;
1152         return 0;
1153 }
1154
1155 /*
1156  * trace_parser_put - frees the buffer for trace parser
1157  */
1158 void trace_parser_put(struct trace_parser *parser)
1159 {
1160         kfree(parser->buffer);
1161 }
1162
1163 /*
1164  * trace_get_user - reads the user input string separated by  space
1165  * (matched by isspace(ch))
1166  *
1167  * For each string found the 'struct trace_parser' is updated,
1168  * and the function returns.
1169  *
1170  * Returns number of bytes read.
1171  *
1172  * See kernel/trace/trace.h for 'struct trace_parser' details.
1173  */
1174 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1175         size_t cnt, loff_t *ppos)
1176 {
1177         char ch;
1178         size_t read = 0;
1179         ssize_t ret;
1180
1181         if (!*ppos)
1182                 trace_parser_clear(parser);
1183
1184         ret = get_user(ch, ubuf++);
1185         if (ret)
1186                 goto out;
1187
1188         read++;
1189         cnt--;
1190
1191         /*
1192          * The parser is not finished with the last write,
1193          * continue reading the user input without skipping spaces.
1194          */
1195         if (!parser->cont) {
1196                 /* skip white space */
1197                 while (cnt && isspace(ch)) {
1198                         ret = get_user(ch, ubuf++);
1199                         if (ret)
1200                                 goto out;
1201                         read++;
1202                         cnt--;
1203                 }
1204
1205                 /* only spaces were written */
1206                 if (isspace(ch)) {
1207                         *ppos += read;
1208                         ret = read;
1209                         goto out;
1210                 }
1211
1212                 parser->idx = 0;
1213         }
1214
1215         /* read the non-space input */
1216         while (cnt && !isspace(ch)) {
1217                 if (parser->idx < parser->size - 1)
1218                         parser->buffer[parser->idx++] = ch;
1219                 else {
1220                         ret = -EINVAL;
1221                         goto out;
1222                 }
1223                 ret = get_user(ch, ubuf++);
1224                 if (ret)
1225                         goto out;
1226                 read++;
1227                 cnt--;
1228         }
1229
1230         /* We either got finished input or we have to wait for another call. */
1231         if (isspace(ch)) {
1232                 parser->buffer[parser->idx] = 0;
1233                 parser->cont = false;
1234         } else if (parser->idx < parser->size - 1) {
1235                 parser->cont = true;
1236                 parser->buffer[parser->idx++] = ch;
1237         } else {
1238                 ret = -EINVAL;
1239                 goto out;
1240         }
1241
1242         *ppos += read;
1243         ret = read;
1244
1245 out:
1246         return ret;
1247 }
1248
1249 /* TODO add a seq_buf_to_buffer() */
1250 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1251 {
1252         int len;
1253
1254         if (trace_seq_used(s) <= s->seq.readpos)
1255                 return -EBUSY;
1256
1257         len = trace_seq_used(s) - s->seq.readpos;
1258         if (cnt > len)
1259                 cnt = len;
1260         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1261
1262         s->seq.readpos += cnt;
1263         return cnt;
1264 }
1265
1266 unsigned long __read_mostly     tracing_thresh;
1267
1268 #ifdef CONFIG_TRACER_MAX_TRACE
1269 /*
1270  * Copy the new maximum trace into the separate maximum-trace
1271  * structure. (this way the maximum trace is permanently saved,
1272  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1273  */
1274 static void
1275 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1276 {
1277         struct trace_buffer *trace_buf = &tr->trace_buffer;
1278         struct trace_buffer *max_buf = &tr->max_buffer;
1279         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1280         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1281
1282         max_buf->cpu = cpu;
1283         max_buf->time_start = data->preempt_timestamp;
1284
1285         max_data->saved_latency = tr->max_latency;
1286         max_data->critical_start = data->critical_start;
1287         max_data->critical_end = data->critical_end;
1288
1289         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1290         max_data->pid = tsk->pid;
1291         /*
1292          * If tsk == current, then use current_uid(), as that does not use
1293          * RCU. The irq tracer can be called out of RCU scope.
1294          */
1295         if (tsk == current)
1296                 max_data->uid = current_uid();
1297         else
1298                 max_data->uid = task_uid(tsk);
1299
1300         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1301         max_data->policy = tsk->policy;
1302         max_data->rt_priority = tsk->rt_priority;
1303
1304         /* record this tasks comm */
1305         tracing_record_cmdline(tsk);
1306 }
1307
1308 /**
1309  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1310  * @tr: tracer
1311  * @tsk: the task with the latency
1312  * @cpu: The cpu that initiated the trace.
1313  *
1314  * Flip the buffers between the @tr and the max_tr and record information
1315  * about which task was the cause of this latency.
1316  */
1317 void
1318 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct ring_buffer *buf;
1321
1322         if (tr->stop_count)
1323                 return;
1324
1325         WARN_ON_ONCE(!irqs_disabled());
1326
1327         if (!tr->allocated_snapshot) {
1328                 /* Only the nop tracer should hit this when disabling */
1329                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1330                 return;
1331         }
1332
1333         arch_spin_lock(&tr->max_lock);
1334
1335         /* Inherit the recordable setting from trace_buffer */
1336         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1337                 ring_buffer_record_on(tr->max_buffer.buffer);
1338         else
1339                 ring_buffer_record_off(tr->max_buffer.buffer);
1340
1341         buf = tr->trace_buffer.buffer;
1342         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1343         tr->max_buffer.buffer = buf;
1344
1345         __update_max_tr(tr, tsk, cpu);
1346         arch_spin_unlock(&tr->max_lock);
1347 }
1348
1349 /**
1350  * update_max_tr_single - only copy one trace over, and reset the rest
1351  * @tr - tracer
1352  * @tsk - task with the latency
1353  * @cpu - the cpu of the buffer to copy.
1354  *
1355  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1356  */
1357 void
1358 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1359 {
1360         int ret;
1361
1362         if (tr->stop_count)
1363                 return;
1364
1365         WARN_ON_ONCE(!irqs_disabled());
1366         if (!tr->allocated_snapshot) {
1367                 /* Only the nop tracer should hit this when disabling */
1368                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1369                 return;
1370         }
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1375
1376         if (ret == -EBUSY) {
1377                 /*
1378                  * We failed to swap the buffer due to a commit taking
1379                  * place on this CPU. We fail to record, but we reset
1380                  * the max trace buffer (no one writes directly to it)
1381                  * and flag that it failed.
1382                  */
1383                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1384                         "Failed to swap buffers due to commit in progress\n");
1385         }
1386
1387         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1388
1389         __update_max_tr(tr, tsk, cpu);
1390         arch_spin_unlock(&tr->max_lock);
1391 }
1392 #endif /* CONFIG_TRACER_MAX_TRACE */
1393
1394 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1395 {
1396         /* Iterators are static, they should be filled or empty */
1397         if (trace_buffer_iter(iter, iter->cpu_file))
1398                 return 0;
1399
1400         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1401                                 full);
1402 }
1403
1404 #ifdef CONFIG_FTRACE_STARTUP_TEST
1405 static int run_tracer_selftest(struct tracer *type)
1406 {
1407         struct trace_array *tr = &global_trace;
1408         struct tracer *saved_tracer = tr->current_trace;
1409         int ret;
1410
1411         if (!type->selftest || tracing_selftest_disabled)
1412                 return 0;
1413
1414         /*
1415          * Run a selftest on this tracer.
1416          * Here we reset the trace buffer, and set the current
1417          * tracer to be this tracer. The tracer can then run some
1418          * internal tracing to verify that everything is in order.
1419          * If we fail, we do not register this tracer.
1420          */
1421         tracing_reset_online_cpus(&tr->trace_buffer);
1422
1423         tr->current_trace = type;
1424
1425 #ifdef CONFIG_TRACER_MAX_TRACE
1426         if (type->use_max_tr) {
1427                 /* If we expanded the buffers, make sure the max is expanded too */
1428                 if (ring_buffer_expanded)
1429                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1430                                            RING_BUFFER_ALL_CPUS);
1431                 tr->allocated_snapshot = true;
1432         }
1433 #endif
1434
1435         /* the test is responsible for initializing and enabling */
1436         pr_info("Testing tracer %s: ", type->name);
1437         ret = type->selftest(type, tr);
1438         /* the test is responsible for resetting too */
1439         tr->current_trace = saved_tracer;
1440         if (ret) {
1441                 printk(KERN_CONT "FAILED!\n");
1442                 /* Add the warning after printing 'FAILED' */
1443                 WARN_ON(1);
1444                 return -1;
1445         }
1446         /* Only reset on passing, to avoid touching corrupted buffers */
1447         tracing_reset_online_cpus(&tr->trace_buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         if (type->use_max_tr) {
1451                 tr->allocated_snapshot = false;
1452
1453                 /* Shrink the max buffer again */
1454                 if (ring_buffer_expanded)
1455                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1456                                            RING_BUFFER_ALL_CPUS);
1457         }
1458 #endif
1459
1460         printk(KERN_CONT "PASSED\n");
1461         return 0;
1462 }
1463 #else
1464 static inline int run_tracer_selftest(struct tracer *type)
1465 {
1466         return 0;
1467 }
1468 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1469
1470 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1471
1472 static void __init apply_trace_boot_options(void);
1473
1474 /**
1475  * register_tracer - register a tracer with the ftrace system.
1476  * @type - the plugin for the tracer
1477  *
1478  * Register a new plugin tracer.
1479  */
1480 int __init register_tracer(struct tracer *type)
1481 {
1482         struct tracer *t;
1483         int ret = 0;
1484
1485         if (!type->name) {
1486                 pr_info("Tracer must have a name\n");
1487                 return -1;
1488         }
1489
1490         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1491                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1492                 return -1;
1493         }
1494
1495         mutex_lock(&trace_types_lock);
1496
1497         tracing_selftest_running = true;
1498
1499         for (t = trace_types; t; t = t->next) {
1500                 if (strcmp(type->name, t->name) == 0) {
1501                         /* already found */
1502                         pr_info("Tracer %s already registered\n",
1503                                 type->name);
1504                         ret = -1;
1505                         goto out;
1506                 }
1507         }
1508
1509         if (!type->set_flag)
1510                 type->set_flag = &dummy_set_flag;
1511         if (!type->flags) {
1512                 /*allocate a dummy tracer_flags*/
1513                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1514                 if (!type->flags) {
1515                         ret = -ENOMEM;
1516                         goto out;
1517                 }
1518                 type->flags->val = 0;
1519                 type->flags->opts = dummy_tracer_opt;
1520         } else
1521                 if (!type->flags->opts)
1522                         type->flags->opts = dummy_tracer_opt;
1523
1524         /* store the tracer for __set_tracer_option */
1525         type->flags->trace = type;
1526
1527         ret = run_tracer_selftest(type);
1528         if (ret < 0)
1529                 goto out;
1530
1531         type->next = trace_types;
1532         trace_types = type;
1533         add_tracer_options(&global_trace, type);
1534
1535  out:
1536         tracing_selftest_running = false;
1537         mutex_unlock(&trace_types_lock);
1538
1539         if (ret || !default_bootup_tracer)
1540                 goto out_unlock;
1541
1542         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1543                 goto out_unlock;
1544
1545         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1546         /* Do we want this tracer to start on bootup? */
1547         tracing_set_tracer(&global_trace, type->name);
1548         default_bootup_tracer = NULL;
1549
1550         apply_trace_boot_options();
1551
1552         /* disable other selftests, since this will break it. */
1553         tracing_selftest_disabled = true;
1554 #ifdef CONFIG_FTRACE_STARTUP_TEST
1555         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1556                type->name);
1557 #endif
1558
1559  out_unlock:
1560         return ret;
1561 }
1562
1563 void tracing_reset(struct trace_buffer *buf, int cpu)
1564 {
1565         struct ring_buffer *buffer = buf->buffer;
1566
1567         if (!buffer)
1568                 return;
1569
1570         ring_buffer_record_disable(buffer);
1571
1572         /* Make sure all commits have finished */
1573         synchronize_sched();
1574         ring_buffer_reset_cpu(buffer, cpu);
1575
1576         ring_buffer_record_enable(buffer);
1577 }
1578
1579 void tracing_reset_online_cpus(struct trace_buffer *buf)
1580 {
1581         struct ring_buffer *buffer = buf->buffer;
1582         int cpu;
1583
1584         if (!buffer)
1585                 return;
1586
1587         ring_buffer_record_disable(buffer);
1588
1589         /* Make sure all commits have finished */
1590         synchronize_sched();
1591
1592         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1593
1594         for_each_online_cpu(cpu)
1595                 ring_buffer_reset_cpu(buffer, cpu);
1596
1597         ring_buffer_record_enable(buffer);
1598 }
1599
1600 /* Must have trace_types_lock held */
1601 void tracing_reset_all_online_cpus(void)
1602 {
1603         struct trace_array *tr;
1604
1605         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1606                 tracing_reset_online_cpus(&tr->trace_buffer);
1607 #ifdef CONFIG_TRACER_MAX_TRACE
1608                 tracing_reset_online_cpus(&tr->max_buffer);
1609 #endif
1610         }
1611 }
1612
1613 #define SAVED_CMDLINES_DEFAULT 128
1614 #define NO_CMDLINE_MAP UINT_MAX
1615 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1616 struct saved_cmdlines_buffer {
1617         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1618         unsigned *map_cmdline_to_pid;
1619         unsigned cmdline_num;
1620         int cmdline_idx;
1621         char *saved_cmdlines;
1622 };
1623 static struct saved_cmdlines_buffer *savedcmd;
1624
1625 static inline char *get_saved_cmdlines(int idx)
1626 {
1627         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1628 }
1629
1630 static inline void set_cmdline(int idx, const char *cmdline)
1631 {
1632         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1633 }
1634
1635 static int allocate_cmdlines_buffer(unsigned int val,
1636                                     struct saved_cmdlines_buffer *s)
1637 {
1638         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1639                                         GFP_KERNEL);
1640         if (!s->map_cmdline_to_pid)
1641                 return -ENOMEM;
1642
1643         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1644         if (!s->saved_cmdlines) {
1645                 kfree(s->map_cmdline_to_pid);
1646                 return -ENOMEM;
1647         }
1648
1649         s->cmdline_idx = 0;
1650         s->cmdline_num = val;
1651         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1652                sizeof(s->map_pid_to_cmdline));
1653         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1654                val * sizeof(*s->map_cmdline_to_pid));
1655
1656         return 0;
1657 }
1658
1659 static int trace_create_savedcmd(void)
1660 {
1661         int ret;
1662
1663         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1664         if (!savedcmd)
1665                 return -ENOMEM;
1666
1667         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1668         if (ret < 0) {
1669                 kfree(savedcmd);
1670                 savedcmd = NULL;
1671                 return -ENOMEM;
1672         }
1673
1674         return 0;
1675 }
1676
1677 int is_tracing_stopped(void)
1678 {
1679         return global_trace.stop_count;
1680 }
1681
1682 /**
1683  * tracing_start - quick start of the tracer
1684  *
1685  * If tracing is enabled but was stopped by tracing_stop,
1686  * this will start the tracer back up.
1687  */
1688 void tracing_start(void)
1689 {
1690         struct ring_buffer *buffer;
1691         unsigned long flags;
1692
1693         if (tracing_disabled)
1694                 return;
1695
1696         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1697         if (--global_trace.stop_count) {
1698                 if (global_trace.stop_count < 0) {
1699                         /* Someone screwed up their debugging */
1700                         WARN_ON_ONCE(1);
1701                         global_trace.stop_count = 0;
1702                 }
1703                 goto out;
1704         }
1705
1706         /* Prevent the buffers from switching */
1707         arch_spin_lock(&global_trace.max_lock);
1708
1709         buffer = global_trace.trace_buffer.buffer;
1710         if (buffer)
1711                 ring_buffer_record_enable(buffer);
1712
1713 #ifdef CONFIG_TRACER_MAX_TRACE
1714         buffer = global_trace.max_buffer.buffer;
1715         if (buffer)
1716                 ring_buffer_record_enable(buffer);
1717 #endif
1718
1719         arch_spin_unlock(&global_trace.max_lock);
1720
1721  out:
1722         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1723 }
1724
1725 static void tracing_start_tr(struct trace_array *tr)
1726 {
1727         struct ring_buffer *buffer;
1728         unsigned long flags;
1729
1730         if (tracing_disabled)
1731                 return;
1732
1733         /* If global, we need to also start the max tracer */
1734         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1735                 return tracing_start();
1736
1737         raw_spin_lock_irqsave(&tr->start_lock, flags);
1738
1739         if (--tr->stop_count) {
1740                 if (tr->stop_count < 0) {
1741                         /* Someone screwed up their debugging */
1742                         WARN_ON_ONCE(1);
1743                         tr->stop_count = 0;
1744                 }
1745                 goto out;
1746         }
1747
1748         buffer = tr->trace_buffer.buffer;
1749         if (buffer)
1750                 ring_buffer_record_enable(buffer);
1751
1752  out:
1753         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1754 }
1755
1756 /**
1757  * tracing_stop - quick stop of the tracer
1758  *
1759  * Light weight way to stop tracing. Use in conjunction with
1760  * tracing_start.
1761  */
1762 void tracing_stop(void)
1763 {
1764         struct ring_buffer *buffer;
1765         unsigned long flags;
1766
1767         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1768         if (global_trace.stop_count++)
1769                 goto out;
1770
1771         /* Prevent the buffers from switching */
1772         arch_spin_lock(&global_trace.max_lock);
1773
1774         buffer = global_trace.trace_buffer.buffer;
1775         if (buffer)
1776                 ring_buffer_record_disable(buffer);
1777
1778 #ifdef CONFIG_TRACER_MAX_TRACE
1779         buffer = global_trace.max_buffer.buffer;
1780         if (buffer)
1781                 ring_buffer_record_disable(buffer);
1782 #endif
1783
1784         arch_spin_unlock(&global_trace.max_lock);
1785
1786  out:
1787         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1788 }
1789
1790 static void tracing_stop_tr(struct trace_array *tr)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         /* If global, we need to also stop the max tracer */
1796         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1797                 return tracing_stop();
1798
1799         raw_spin_lock_irqsave(&tr->start_lock, flags);
1800         if (tr->stop_count++)
1801                 goto out;
1802
1803         buffer = tr->trace_buffer.buffer;
1804         if (buffer)
1805                 ring_buffer_record_disable(buffer);
1806
1807  out:
1808         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1809 }
1810
1811 void trace_stop_cmdline_recording(void);
1812
1813 static int trace_save_cmdline(struct task_struct *tsk)
1814 {
1815         unsigned tpid, idx;
1816
1817         /* treat recording of idle task as a success */
1818         if (!tsk->pid)
1819                 return 1;
1820
1821         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1822
1823         /*
1824          * It's not the end of the world if we don't get
1825          * the lock, but we also don't want to spin
1826          * nor do we want to disable interrupts,
1827          * so if we miss here, then better luck next time.
1828          */
1829         if (!arch_spin_trylock(&trace_cmdline_lock))
1830                 return 0;
1831
1832         idx = savedcmd->map_pid_to_cmdline[tpid];
1833         if (idx == NO_CMDLINE_MAP) {
1834                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1835
1836                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1837                 savedcmd->cmdline_idx = idx;
1838         }
1839
1840         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1841         set_cmdline(idx, tsk->comm);
1842
1843         arch_spin_unlock(&trace_cmdline_lock);
1844
1845         return 1;
1846 }
1847
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850         unsigned map;
1851         int tpid;
1852
1853         if (!pid) {
1854                 strcpy(comm, "<idle>");
1855                 return;
1856         }
1857
1858         if (WARN_ON_ONCE(pid < 0)) {
1859                 strcpy(comm, "<XXX>");
1860                 return;
1861         }
1862
1863         tpid = pid & (PID_MAX_DEFAULT - 1);
1864         map = savedcmd->map_pid_to_cmdline[tpid];
1865         if (map != NO_CMDLINE_MAP) {
1866                 tpid = savedcmd->map_cmdline_to_pid[map];
1867                 if (tpid == pid) {
1868                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1869                         return;
1870                 }
1871         }
1872         strcpy(comm, "<...>");
1873 }
1874
1875 void trace_find_cmdline(int pid, char comm[])
1876 {
1877         preempt_disable();
1878         arch_spin_lock(&trace_cmdline_lock);
1879
1880         __trace_find_cmdline(pid, comm);
1881
1882         arch_spin_unlock(&trace_cmdline_lock);
1883         preempt_enable();
1884 }
1885
1886 void tracing_record_cmdline(struct task_struct *tsk)
1887 {
1888         if (!__this_cpu_read(trace_cmdline_save))
1889                 return;
1890
1891         if (trace_save_cmdline(tsk))
1892                 __this_cpu_write(trace_cmdline_save, false);
1893 }
1894
1895 void
1896 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1897                              int pc)
1898 {
1899         struct task_struct *tsk = current;
1900
1901         entry->preempt_count            = pc & 0xff;
1902         entry->pid                      = (tsk) ? tsk->pid : 0;
1903         entry->flags =
1904 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1905                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1906 #else
1907                 TRACE_FLAG_IRQS_NOSUPPORT |
1908 #endif
1909                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1910                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1911                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1912                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1913                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1914 }
1915 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1916
1917 static __always_inline void
1918 trace_event_setup(struct ring_buffer_event *event,
1919                   int type, unsigned long flags, int pc)
1920 {
1921         struct trace_entry *ent = ring_buffer_event_data(event);
1922
1923         tracing_generic_entry_update(ent, flags, pc);
1924         ent->type = type;
1925 }
1926
1927 struct ring_buffer_event *
1928 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1929                           int type,
1930                           unsigned long len,
1931                           unsigned long flags, int pc)
1932 {
1933         struct ring_buffer_event *event;
1934
1935         event = ring_buffer_lock_reserve(buffer, len);
1936         if (event != NULL)
1937                 trace_event_setup(event, type, flags, pc);
1938
1939         return event;
1940 }
1941
1942 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1943 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1944 static int trace_buffered_event_ref;
1945
1946 /**
1947  * trace_buffered_event_enable - enable buffering events
1948  *
1949  * When events are being filtered, it is quicker to use a temporary
1950  * buffer to write the event data into if there's a likely chance
1951  * that it will not be committed. The discard of the ring buffer
1952  * is not as fast as committing, and is much slower than copying
1953  * a commit.
1954  *
1955  * When an event is to be filtered, allocate per cpu buffers to
1956  * write the event data into, and if the event is filtered and discarded
1957  * it is simply dropped, otherwise, the entire data is to be committed
1958  * in one shot.
1959  */
1960 void trace_buffered_event_enable(void)
1961 {
1962         struct ring_buffer_event *event;
1963         struct page *page;
1964         int cpu;
1965
1966         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1967
1968         if (trace_buffered_event_ref++)
1969                 return;
1970
1971         for_each_tracing_cpu(cpu) {
1972                 page = alloc_pages_node(cpu_to_node(cpu),
1973                                         GFP_KERNEL | __GFP_NORETRY, 0);
1974                 if (!page)
1975                         goto failed;
1976
1977                 event = page_address(page);
1978                 memset(event, 0, sizeof(*event));
1979
1980                 per_cpu(trace_buffered_event, cpu) = event;
1981
1982                 preempt_disable();
1983                 if (cpu == smp_processor_id() &&
1984                     this_cpu_read(trace_buffered_event) !=
1985                     per_cpu(trace_buffered_event, cpu))
1986                         WARN_ON_ONCE(1);
1987                 preempt_enable();
1988         }
1989
1990         return;
1991  failed:
1992         trace_buffered_event_disable();
1993 }
1994
1995 static void enable_trace_buffered_event(void *data)
1996 {
1997         /* Probably not needed, but do it anyway */
1998         smp_rmb();
1999         this_cpu_dec(trace_buffered_event_cnt);
2000 }
2001
2002 static void disable_trace_buffered_event(void *data)
2003 {
2004         this_cpu_inc(trace_buffered_event_cnt);
2005 }
2006
2007 /**
2008  * trace_buffered_event_disable - disable buffering events
2009  *
2010  * When a filter is removed, it is faster to not use the buffered
2011  * events, and to commit directly into the ring buffer. Free up
2012  * the temp buffers when there are no more users. This requires
2013  * special synchronization with current events.
2014  */
2015 void trace_buffered_event_disable(void)
2016 {
2017         int cpu;
2018
2019         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2020
2021         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2022                 return;
2023
2024         if (--trace_buffered_event_ref)
2025                 return;
2026
2027         preempt_disable();
2028         /* For each CPU, set the buffer as used. */
2029         smp_call_function_many(tracing_buffer_mask,
2030                                disable_trace_buffered_event, NULL, 1);
2031         preempt_enable();
2032
2033         /* Wait for all current users to finish */
2034         synchronize_sched();
2035
2036         for_each_tracing_cpu(cpu) {
2037                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2038                 per_cpu(trace_buffered_event, cpu) = NULL;
2039         }
2040         /*
2041          * Make sure trace_buffered_event is NULL before clearing
2042          * trace_buffered_event_cnt.
2043          */
2044         smp_wmb();
2045
2046         preempt_disable();
2047         /* Do the work on each cpu */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                enable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051 }
2052
2053 void
2054 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2055 {
2056         __this_cpu_write(trace_cmdline_save, true);
2057
2058         /* If this is the temp buffer, we need to commit fully */
2059         if (this_cpu_read(trace_buffered_event) == event) {
2060                 /* Length is in event->array[0] */
2061                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2062                 /* Release the temp buffer */
2063                 this_cpu_dec(trace_buffered_event_cnt);
2064         } else
2065                 ring_buffer_unlock_commit(buffer, event);
2066 }
2067
2068 static struct ring_buffer *temp_buffer;
2069
2070 struct ring_buffer_event *
2071 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2072                           struct trace_event_file *trace_file,
2073                           int type, unsigned long len,
2074                           unsigned long flags, int pc)
2075 {
2076         struct ring_buffer_event *entry;
2077         int val;
2078
2079         *current_rb = trace_file->tr->trace_buffer.buffer;
2080
2081         if ((trace_file->flags &
2082              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2083             (entry = this_cpu_read(trace_buffered_event))) {
2084                 /* Try to use the per cpu buffer first */
2085                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2086                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2087                         trace_event_setup(entry, type, flags, pc);
2088                         entry->array[0] = len;
2089                         return entry;
2090                 }
2091                 this_cpu_dec(trace_buffered_event_cnt);
2092         }
2093
2094         entry = trace_buffer_lock_reserve(*current_rb,
2095                                          type, len, flags, pc);
2096         /*
2097          * If tracing is off, but we have triggers enabled
2098          * we still need to look at the event data. Use the temp_buffer
2099          * to store the trace event for the tigger to use. It's recusive
2100          * safe and will not be recorded anywhere.
2101          */
2102         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2103                 *current_rb = temp_buffer;
2104                 entry = trace_buffer_lock_reserve(*current_rb,
2105                                                   type, len, flags, pc);
2106         }
2107         return entry;
2108 }
2109 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2110
2111 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2112                                      struct ring_buffer *buffer,
2113                                      struct ring_buffer_event *event,
2114                                      unsigned long flags, int pc,
2115                                      struct pt_regs *regs)
2116 {
2117         __buffer_unlock_commit(buffer, event);
2118
2119         /*
2120          * If regs is not set, then skip the following callers:
2121          *   trace_buffer_unlock_commit_regs
2122          *   event_trigger_unlock_commit
2123          *   trace_event_buffer_commit
2124          *   trace_event_raw_event_sched_switch
2125          * Note, we can still get here via blktrace, wakeup tracer
2126          * and mmiotrace, but that's ok if they lose a function or
2127          * two. They are that meaningful.
2128          */
2129         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2130         ftrace_trace_userstack(tr, buffer, flags, pc);
2131 }
2132
2133 void
2134 trace_function(struct trace_array *tr,
2135                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2136                int pc)
2137 {
2138         struct trace_event_call *call = &event_function;
2139         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2140         struct ring_buffer_event *event;
2141         struct ftrace_entry *entry;
2142
2143         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2144                                           flags, pc);
2145         if (!event)
2146                 return;
2147         entry   = ring_buffer_event_data(event);
2148         entry->ip                       = ip;
2149         entry->parent_ip                = parent_ip;
2150
2151         if (!call_filter_check_discard(call, entry, buffer, event))
2152                 __buffer_unlock_commit(buffer, event);
2153 }
2154
2155 #ifdef CONFIG_STACKTRACE
2156
2157 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2158 struct ftrace_stack {
2159         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2160 };
2161
2162 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2163 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2164
2165 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2166                                  unsigned long flags,
2167                                  int skip, int pc, struct pt_regs *regs)
2168 {
2169         struct trace_event_call *call = &event_kernel_stack;
2170         struct ring_buffer_event *event;
2171         struct stack_entry *entry;
2172         struct stack_trace trace;
2173         int use_stack;
2174         int size = FTRACE_STACK_ENTRIES;
2175
2176         trace.nr_entries        = 0;
2177         trace.skip              = skip;
2178
2179         /*
2180          * Add two, for this function and the call to save_stack_trace()
2181          * If regs is set, then these functions will not be in the way.
2182          */
2183         if (!regs)
2184                 trace.skip += 2;
2185
2186         /*
2187          * Since events can happen in NMIs there's no safe way to
2188          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2189          * or NMI comes in, it will just have to use the default
2190          * FTRACE_STACK_SIZE.
2191          */
2192         preempt_disable_notrace();
2193
2194         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2195         /*
2196          * We don't need any atomic variables, just a barrier.
2197          * If an interrupt comes in, we don't care, because it would
2198          * have exited and put the counter back to what we want.
2199          * We just need a barrier to keep gcc from moving things
2200          * around.
2201          */
2202         barrier();
2203         if (use_stack == 1) {
2204                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2205                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2206
2207                 if (regs)
2208                         save_stack_trace_regs(regs, &trace);
2209                 else
2210                         save_stack_trace(&trace);
2211
2212                 if (trace.nr_entries > size)
2213                         size = trace.nr_entries;
2214         } else
2215                 /* From now on, use_stack is a boolean */
2216                 use_stack = 0;
2217
2218         size *= sizeof(unsigned long);
2219
2220         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2221                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2222                                     flags, pc);
2223         if (!event)
2224                 goto out;
2225         entry = ring_buffer_event_data(event);
2226
2227         memset(&entry->caller, 0, size);
2228
2229         if (use_stack)
2230                 memcpy(&entry->caller, trace.entries,
2231                        trace.nr_entries * sizeof(unsigned long));
2232         else {
2233                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2234                 trace.entries           = entry->caller;
2235                 if (regs)
2236                         save_stack_trace_regs(regs, &trace);
2237                 else
2238                         save_stack_trace(&trace);
2239         }
2240
2241         entry->size = trace.nr_entries;
2242
2243         if (!call_filter_check_discard(call, entry, buffer, event))
2244                 __buffer_unlock_commit(buffer, event);
2245
2246  out:
2247         /* Again, don't let gcc optimize things here */
2248         barrier();
2249         __this_cpu_dec(ftrace_stack_reserve);
2250         preempt_enable_notrace();
2251
2252 }
2253
2254 static inline void ftrace_trace_stack(struct trace_array *tr,
2255                                       struct ring_buffer *buffer,
2256                                       unsigned long flags,
2257                                       int skip, int pc, struct pt_regs *regs)
2258 {
2259         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2260                 return;
2261
2262         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2263 }
2264
2265 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2266                    int pc)
2267 {
2268         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2269 }
2270
2271 /**
2272  * trace_dump_stack - record a stack back trace in the trace buffer
2273  * @skip: Number of functions to skip (helper handlers)
2274  */
2275 void trace_dump_stack(int skip)
2276 {
2277         unsigned long flags;
2278
2279         if (tracing_disabled || tracing_selftest_running)
2280                 return;
2281
2282         local_save_flags(flags);
2283
2284         /*
2285          * Skip 3 more, seems to get us at the caller of
2286          * this function.
2287          */
2288         skip += 3;
2289         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2290                              flags, skip, preempt_count(), NULL);
2291 }
2292
2293 static DEFINE_PER_CPU(int, user_stack_count);
2294
2295 void
2296 ftrace_trace_userstack(struct trace_array *tr,
2297                        struct ring_buffer *buffer, unsigned long flags, int pc)
2298 {
2299         struct trace_event_call *call = &event_user_stack;
2300         struct ring_buffer_event *event;
2301         struct userstack_entry *entry;
2302         struct stack_trace trace;
2303
2304         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2305                 return;
2306
2307         /*
2308          * NMIs can not handle page faults, even with fix ups.
2309          * The save user stack can (and often does) fault.
2310          */
2311         if (unlikely(in_nmi()))
2312                 return;
2313
2314         /*
2315          * prevent recursion, since the user stack tracing may
2316          * trigger other kernel events.
2317          */
2318         preempt_disable();
2319         if (__this_cpu_read(user_stack_count))
2320                 goto out;
2321
2322         __this_cpu_inc(user_stack_count);
2323
2324         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2325                                           sizeof(*entry), flags, pc);
2326         if (!event)
2327                 goto out_drop_count;
2328         entry   = ring_buffer_event_data(event);
2329
2330         entry->tgid             = current->tgid;
2331         memset(&entry->caller, 0, sizeof(entry->caller));
2332
2333         trace.nr_entries        = 0;
2334         trace.max_entries       = FTRACE_STACK_ENTRIES;
2335         trace.skip              = 0;
2336         trace.entries           = entry->caller;
2337
2338         save_stack_trace_user(&trace);
2339         if (!call_filter_check_discard(call, entry, buffer, event))
2340                 __buffer_unlock_commit(buffer, event);
2341
2342  out_drop_count:
2343         __this_cpu_dec(user_stack_count);
2344  out:
2345         preempt_enable();
2346 }
2347
2348 #ifdef UNUSED
2349 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2350 {
2351         ftrace_trace_userstack(tr, flags, preempt_count());
2352 }
2353 #endif /* UNUSED */
2354
2355 #endif /* CONFIG_STACKTRACE */
2356
2357 /* created for use with alloc_percpu */
2358 struct trace_buffer_struct {
2359         int nesting;
2360         char buffer[4][TRACE_BUF_SIZE];
2361 };
2362
2363 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2364
2365 /*
2366  * Thise allows for lockless recording.  If we're nested too deeply, then
2367  * this returns NULL.
2368  */
2369 static char *get_trace_buf(void)
2370 {
2371         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2372
2373         if (!trace_percpu_buffer || buffer->nesting >= 4)
2374                 return NULL;
2375
2376         buffer->nesting++;
2377
2378         /* Interrupts must see nesting incremented before we use the buffer */
2379         barrier();
2380         return &buffer->buffer[buffer->nesting - 1][0];
2381 }
2382
2383 static void put_trace_buf(void)
2384 {
2385         /* Don't let the decrement of nesting leak before this */
2386         barrier();
2387         this_cpu_dec(trace_percpu_buffer->nesting);
2388 }
2389
2390 static int alloc_percpu_trace_buffer(void)
2391 {
2392         struct trace_buffer_struct __percpu *buffers;
2393
2394         buffers = alloc_percpu(struct trace_buffer_struct);
2395         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2396                 return -ENOMEM;
2397
2398         trace_percpu_buffer = buffers;
2399         return 0;
2400 }
2401
2402 static int buffers_allocated;
2403
2404 void trace_printk_init_buffers(void)
2405 {
2406         if (buffers_allocated)
2407                 return;
2408
2409         if (alloc_percpu_trace_buffer())
2410                 return;
2411
2412         /* trace_printk() is for debug use only. Don't use it in production. */
2413
2414         pr_warn("\n");
2415         pr_warn("**********************************************************\n");
2416         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417         pr_warn("**                                                      **\n");
2418         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2419         pr_warn("**                                                      **\n");
2420         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2421         pr_warn("** unsafe for production use.                           **\n");
2422         pr_warn("**                                                      **\n");
2423         pr_warn("** If you see this message and you are not debugging    **\n");
2424         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2425         pr_warn("**                                                      **\n");
2426         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2427         pr_warn("**********************************************************\n");
2428
2429         /* Expand the buffers to set size */
2430         tracing_update_buffers();
2431
2432         buffers_allocated = 1;
2433
2434         /*
2435          * trace_printk_init_buffers() can be called by modules.
2436          * If that happens, then we need to start cmdline recording
2437          * directly here. If the global_trace.buffer is already
2438          * allocated here, then this was called by module code.
2439          */
2440         if (global_trace.trace_buffer.buffer)
2441                 tracing_start_cmdline_record();
2442 }
2443
2444 void trace_printk_start_comm(void)
2445 {
2446         /* Start tracing comms if trace printk is set */
2447         if (!buffers_allocated)
2448                 return;
2449         tracing_start_cmdline_record();
2450 }
2451
2452 static void trace_printk_start_stop_comm(int enabled)
2453 {
2454         if (!buffers_allocated)
2455                 return;
2456
2457         if (enabled)
2458                 tracing_start_cmdline_record();
2459         else
2460                 tracing_stop_cmdline_record();
2461 }
2462
2463 /**
2464  * trace_vbprintk - write binary msg to tracing buffer
2465  *
2466  */
2467 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2468 {
2469         struct trace_event_call *call = &event_bprint;
2470         struct ring_buffer_event *event;
2471         struct ring_buffer *buffer;
2472         struct trace_array *tr = &global_trace;
2473         struct bprint_entry *entry;
2474         unsigned long flags;
2475         char *tbuffer;
2476         int len = 0, size, pc;
2477
2478         if (unlikely(tracing_selftest_running || tracing_disabled))
2479                 return 0;
2480
2481         /* Don't pollute graph traces with trace_vprintk internals */
2482         pause_graph_tracing();
2483
2484         pc = preempt_count();
2485         preempt_disable_notrace();
2486
2487         tbuffer = get_trace_buf();
2488         if (!tbuffer) {
2489                 len = 0;
2490                 goto out_nobuffer;
2491         }
2492
2493         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2494
2495         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2496                 goto out;
2497
2498         local_save_flags(flags);
2499         size = sizeof(*entry) + sizeof(u32) * len;
2500         buffer = tr->trace_buffer.buffer;
2501         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2502                                           flags, pc);
2503         if (!event)
2504                 goto out;
2505         entry = ring_buffer_event_data(event);
2506         entry->ip                       = ip;
2507         entry->fmt                      = fmt;
2508
2509         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2510         if (!call_filter_check_discard(call, entry, buffer, event)) {
2511                 __buffer_unlock_commit(buffer, event);
2512                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2513         }
2514
2515 out:
2516         put_trace_buf();
2517
2518 out_nobuffer:
2519         preempt_enable_notrace();
2520         unpause_graph_tracing();
2521
2522         return len;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_vbprintk);
2525
2526 __printf(3, 0)
2527 static int
2528 __trace_array_vprintk(struct ring_buffer *buffer,
2529                       unsigned long ip, const char *fmt, va_list args)
2530 {
2531         struct trace_event_call *call = &event_print;
2532         struct ring_buffer_event *event;
2533         int len = 0, size, pc;
2534         struct print_entry *entry;
2535         unsigned long flags;
2536         char *tbuffer;
2537
2538         if (tracing_disabled || tracing_selftest_running)
2539                 return 0;
2540
2541         /* Don't pollute graph traces with trace_vprintk internals */
2542         pause_graph_tracing();
2543
2544         pc = preempt_count();
2545         preempt_disable_notrace();
2546
2547
2548         tbuffer = get_trace_buf();
2549         if (!tbuffer) {
2550                 len = 0;
2551                 goto out_nobuffer;
2552         }
2553
2554         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2555
2556         local_save_flags(flags);
2557         size = sizeof(*entry) + len + 1;
2558         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2559                                           flags, pc);
2560         if (!event)
2561                 goto out;
2562         entry = ring_buffer_event_data(event);
2563         entry->ip = ip;
2564
2565         memcpy(&entry->buf, tbuffer, len + 1);
2566         if (!call_filter_check_discard(call, entry, buffer, event)) {
2567                 __buffer_unlock_commit(buffer, event);
2568                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2569         }
2570
2571 out:
2572         put_trace_buf();
2573
2574 out_nobuffer:
2575         preempt_enable_notrace();
2576         unpause_graph_tracing();
2577
2578         return len;
2579 }
2580
2581 __printf(3, 0)
2582 int trace_array_vprintk(struct trace_array *tr,
2583                         unsigned long ip, const char *fmt, va_list args)
2584 {
2585         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2586 }
2587
2588 __printf(3, 0)
2589 int trace_array_printk(struct trace_array *tr,
2590                        unsigned long ip, const char *fmt, ...)
2591 {
2592         int ret;
2593         va_list ap;
2594
2595         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2596                 return 0;
2597
2598         if (!tr)
2599                 return -ENOENT;
2600
2601         va_start(ap, fmt);
2602         ret = trace_array_vprintk(tr, ip, fmt, ap);
2603         va_end(ap);
2604         return ret;
2605 }
2606
2607 __printf(3, 4)
2608 int trace_array_printk_buf(struct ring_buffer *buffer,
2609                            unsigned long ip, const char *fmt, ...)
2610 {
2611         int ret;
2612         va_list ap;
2613
2614         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2615                 return 0;
2616
2617         va_start(ap, fmt);
2618         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2619         va_end(ap);
2620         return ret;
2621 }
2622
2623 __printf(2, 0)
2624 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2625 {
2626         return trace_array_vprintk(&global_trace, ip, fmt, args);
2627 }
2628 EXPORT_SYMBOL_GPL(trace_vprintk);
2629
2630 static void trace_iterator_increment(struct trace_iterator *iter)
2631 {
2632         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2633
2634         iter->idx++;
2635         if (buf_iter)
2636                 ring_buffer_read(buf_iter, NULL);
2637 }
2638
2639 static struct trace_entry *
2640 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2641                 unsigned long *lost_events)
2642 {
2643         struct ring_buffer_event *event;
2644         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2645
2646         if (buf_iter)
2647                 event = ring_buffer_iter_peek(buf_iter, ts);
2648         else
2649                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2650                                          lost_events);
2651
2652         if (event) {
2653                 iter->ent_size = ring_buffer_event_length(event);
2654                 return ring_buffer_event_data(event);
2655         }
2656         iter->ent_size = 0;
2657         return NULL;
2658 }
2659
2660 static struct trace_entry *
2661 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2662                   unsigned long *missing_events, u64 *ent_ts)
2663 {
2664         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2665         struct trace_entry *ent, *next = NULL;
2666         unsigned long lost_events = 0, next_lost = 0;
2667         int cpu_file = iter->cpu_file;
2668         u64 next_ts = 0, ts;
2669         int next_cpu = -1;
2670         int next_size = 0;
2671         int cpu;
2672
2673         /*
2674          * If we are in a per_cpu trace file, don't bother by iterating over
2675          * all cpu and peek directly.
2676          */
2677         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2678                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2679                         return NULL;
2680                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2681                 if (ent_cpu)
2682                         *ent_cpu = cpu_file;
2683
2684                 return ent;
2685         }
2686
2687         for_each_tracing_cpu(cpu) {
2688
2689                 if (ring_buffer_empty_cpu(buffer, cpu))
2690                         continue;
2691
2692                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2693
2694                 /*
2695                  * Pick the entry with the smallest timestamp:
2696                  */
2697                 if (ent && (!next || ts < next_ts)) {
2698                         next = ent;
2699                         next_cpu = cpu;
2700                         next_ts = ts;
2701                         next_lost = lost_events;
2702                         next_size = iter->ent_size;
2703                 }
2704         }
2705
2706         iter->ent_size = next_size;
2707
2708         if (ent_cpu)
2709                 *ent_cpu = next_cpu;
2710
2711         if (ent_ts)
2712                 *ent_ts = next_ts;
2713
2714         if (missing_events)
2715                 *missing_events = next_lost;
2716
2717         return next;
2718 }
2719
2720 /* Find the next real entry, without updating the iterator itself */
2721 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2722                                           int *ent_cpu, u64 *ent_ts)
2723 {
2724         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2725 }
2726
2727 /* Find the next real entry, and increment the iterator to the next entry */
2728 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2729 {
2730         iter->ent = __find_next_entry(iter, &iter->cpu,
2731                                       &iter->lost_events, &iter->ts);
2732
2733         if (iter->ent)
2734                 trace_iterator_increment(iter);
2735
2736         return iter->ent ? iter : NULL;
2737 }
2738
2739 static void trace_consume(struct trace_iterator *iter)
2740 {
2741         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2742                             &iter->lost_events);
2743 }
2744
2745 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2746 {
2747         struct trace_iterator *iter = m->private;
2748         int i = (int)*pos;
2749         void *ent;
2750
2751         WARN_ON_ONCE(iter->leftover);
2752
2753         (*pos)++;
2754
2755         /* can't go backwards */
2756         if (iter->idx > i)
2757                 return NULL;
2758
2759         if (iter->idx < 0)
2760                 ent = trace_find_next_entry_inc(iter);
2761         else
2762                 ent = iter;
2763
2764         while (ent && iter->idx < i)
2765                 ent = trace_find_next_entry_inc(iter);
2766
2767         iter->pos = *pos;
2768
2769         return ent;
2770 }
2771
2772 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2773 {
2774         struct ring_buffer_event *event;
2775         struct ring_buffer_iter *buf_iter;
2776         unsigned long entries = 0;
2777         u64 ts;
2778
2779         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2780
2781         buf_iter = trace_buffer_iter(iter, cpu);
2782         if (!buf_iter)
2783                 return;
2784
2785         ring_buffer_iter_reset(buf_iter);
2786
2787         /*
2788          * We could have the case with the max latency tracers
2789          * that a reset never took place on a cpu. This is evident
2790          * by the timestamp being before the start of the buffer.
2791          */
2792         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2793                 if (ts >= iter->trace_buffer->time_start)
2794                         break;
2795                 entries++;
2796                 ring_buffer_read(buf_iter, NULL);
2797         }
2798
2799         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2800 }
2801
2802 /*
2803  * The current tracer is copied to avoid a global locking
2804  * all around.
2805  */
2806 static void *s_start(struct seq_file *m, loff_t *pos)
2807 {
2808         struct trace_iterator *iter = m->private;
2809         struct trace_array *tr = iter->tr;
2810         int cpu_file = iter->cpu_file;
2811         void *p = NULL;
2812         loff_t l = 0;
2813         int cpu;
2814
2815         /*
2816          * copy the tracer to avoid using a global lock all around.
2817          * iter->trace is a copy of current_trace, the pointer to the
2818          * name may be used instead of a strcmp(), as iter->trace->name
2819          * will point to the same string as current_trace->name.
2820          */
2821         mutex_lock(&trace_types_lock);
2822         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2823                 *iter->trace = *tr->current_trace;
2824         mutex_unlock(&trace_types_lock);
2825
2826 #ifdef CONFIG_TRACER_MAX_TRACE
2827         if (iter->snapshot && iter->trace->use_max_tr)
2828                 return ERR_PTR(-EBUSY);
2829 #endif
2830
2831         if (*pos != iter->pos) {
2832                 iter->ent = NULL;
2833                 iter->cpu = 0;
2834                 iter->idx = -1;
2835
2836                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2837                         for_each_tracing_cpu(cpu)
2838                                 tracing_iter_reset(iter, cpu);
2839                 } else
2840                         tracing_iter_reset(iter, cpu_file);
2841
2842                 iter->leftover = 0;
2843                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2844                         ;
2845
2846         } else {
2847                 /*
2848                  * If we overflowed the seq_file before, then we want
2849                  * to just reuse the trace_seq buffer again.
2850                  */
2851                 if (iter->leftover)
2852                         p = iter;
2853                 else {
2854                         l = *pos - 1;
2855                         p = s_next(m, p, &l);
2856                 }
2857         }
2858
2859         trace_event_read_lock();
2860         trace_access_lock(cpu_file);
2861         return p;
2862 }
2863
2864 static void s_stop(struct seq_file *m, void *p)
2865 {
2866         struct trace_iterator *iter = m->private;
2867
2868 #ifdef CONFIG_TRACER_MAX_TRACE
2869         if (iter->snapshot && iter->trace->use_max_tr)
2870                 return;
2871 #endif
2872
2873         trace_access_unlock(iter->cpu_file);
2874         trace_event_read_unlock();
2875 }
2876
2877 static void
2878 get_total_entries(struct trace_buffer *buf,
2879                   unsigned long *total, unsigned long *entries)
2880 {
2881         unsigned long count;
2882         int cpu;
2883
2884         *total = 0;
2885         *entries = 0;
2886
2887         for_each_tracing_cpu(cpu) {
2888                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2889                 /*
2890                  * If this buffer has skipped entries, then we hold all
2891                  * entries for the trace and we need to ignore the
2892                  * ones before the time stamp.
2893                  */
2894                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2895                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2896                         /* total is the same as the entries */
2897                         *total += count;
2898                 } else
2899                         *total += count +
2900                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2901                 *entries += count;
2902         }
2903 }
2904
2905 static void print_lat_help_header(struct seq_file *m)
2906 {
2907         seq_puts(m, "#                  _------=> CPU#            \n"
2908                     "#                 / _-----=> irqs-off        \n"
2909                     "#                | / _----=> need-resched    \n"
2910                     "#                || / _---=> hardirq/softirq \n"
2911                     "#                ||| / _--=> preempt-depth   \n"
2912                     "#                |||| /     delay            \n"
2913                     "#  cmd     pid   ||||| time  |   caller      \n"
2914                     "#     \\   /      |||||  \\    |   /         \n");
2915 }
2916
2917 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2918 {
2919         unsigned long total;
2920         unsigned long entries;
2921
2922         get_total_entries(buf, &total, &entries);
2923         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2924                    entries, total, num_online_cpus());
2925         seq_puts(m, "#\n");
2926 }
2927
2928 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2929 {
2930         print_event_info(buf, m);
2931         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2932                     "#              | |       |          |         |\n");
2933 }
2934
2935 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2936 {
2937         print_event_info(buf, m);
2938         seq_puts(m, "#                              _-----=> irqs-off\n"
2939                     "#                             / _----=> need-resched\n"
2940                     "#                            | / _---=> hardirq/softirq\n"
2941                     "#                            || / _--=> preempt-depth\n"
2942                     "#                            ||| /     delay\n"
2943                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2944                     "#              | |       |   ||||       |         |\n");
2945 }
2946
2947 void
2948 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2949 {
2950         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2951         struct trace_buffer *buf = iter->trace_buffer;
2952         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2953         struct tracer *type = iter->trace;
2954         unsigned long entries;
2955         unsigned long total;
2956         const char *name = "preemption";
2957
2958         name = type->name;
2959
2960         get_total_entries(buf, &total, &entries);
2961
2962         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2963                    name, UTS_RELEASE);
2964         seq_puts(m, "# -----------------------------------"
2965                  "---------------------------------\n");
2966         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2967                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2968                    nsecs_to_usecs(data->saved_latency),
2969                    entries,
2970                    total,
2971                    buf->cpu,
2972 #if defined(CONFIG_PREEMPT_NONE)
2973                    "server",
2974 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2975                    "desktop",
2976 #elif defined(CONFIG_PREEMPT)
2977                    "preempt",
2978 #else
2979                    "unknown",
2980 #endif
2981                    /* These are reserved for later use */
2982                    0, 0, 0, 0);
2983 #ifdef CONFIG_SMP
2984         seq_printf(m, " #P:%d)\n", num_online_cpus());
2985 #else
2986         seq_puts(m, ")\n");
2987 #endif
2988         seq_puts(m, "#    -----------------\n");
2989         seq_printf(m, "#    | task: %.16s-%d "
2990                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2991                    data->comm, data->pid,
2992                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2993                    data->policy, data->rt_priority);
2994         seq_puts(m, "#    -----------------\n");
2995
2996         if (data->critical_start) {
2997                 seq_puts(m, "#  => started at: ");
2998                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2999                 trace_print_seq(m, &iter->seq);
3000                 seq_puts(m, "\n#  => ended at:   ");
3001                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3002                 trace_print_seq(m, &iter->seq);
3003                 seq_puts(m, "\n#\n");
3004         }
3005
3006         seq_puts(m, "#\n");
3007 }
3008
3009 static void test_cpu_buff_start(struct trace_iterator *iter)
3010 {
3011         struct trace_seq *s = &iter->seq;
3012         struct trace_array *tr = iter->tr;
3013
3014         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3015                 return;
3016
3017         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3018                 return;
3019
3020         if (cpumask_available(iter->started) &&
3021             cpumask_test_cpu(iter->cpu, iter->started))
3022                 return;
3023
3024         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3025                 return;
3026
3027         if (cpumask_available(iter->started))
3028                 cpumask_set_cpu(iter->cpu, iter->started);
3029
3030         /* Don't print started cpu buffer for the first entry of the trace */
3031         if (iter->idx > 1)
3032                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3033                                 iter->cpu);
3034 }
3035
3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3037 {
3038         struct trace_array *tr = iter->tr;
3039         struct trace_seq *s = &iter->seq;
3040         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3041         struct trace_entry *entry;
3042         struct trace_event *event;
3043
3044         entry = iter->ent;
3045
3046         test_cpu_buff_start(iter);
3047
3048         event = ftrace_find_event(entry->type);
3049
3050         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3051                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3052                         trace_print_lat_context(iter);
3053                 else
3054                         trace_print_context(iter);
3055         }
3056
3057         if (trace_seq_has_overflowed(s))
3058                 return TRACE_TYPE_PARTIAL_LINE;
3059
3060         if (event)
3061                 return event->funcs->trace(iter, sym_flags, event);
3062
3063         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3064
3065         return trace_handle_return(s);
3066 }
3067
3068 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3069 {
3070         struct trace_array *tr = iter->tr;
3071         struct trace_seq *s = &iter->seq;
3072         struct trace_entry *entry;
3073         struct trace_event *event;
3074
3075         entry = iter->ent;
3076
3077         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3078                 trace_seq_printf(s, "%d %d %llu ",
3079                                  entry->pid, iter->cpu, iter->ts);
3080
3081         if (trace_seq_has_overflowed(s))
3082                 return TRACE_TYPE_PARTIAL_LINE;
3083
3084         event = ftrace_find_event(entry->type);
3085         if (event)
3086                 return event->funcs->raw(iter, 0, event);
3087
3088         trace_seq_printf(s, "%d ?\n", entry->type);
3089
3090         return trace_handle_return(s);
3091 }
3092
3093 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3094 {
3095         struct trace_array *tr = iter->tr;
3096         struct trace_seq *s = &iter->seq;
3097         unsigned char newline = '\n';
3098         struct trace_entry *entry;
3099         struct trace_event *event;
3100
3101         entry = iter->ent;
3102
3103         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3104                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3105                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3106                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3107                 if (trace_seq_has_overflowed(s))
3108                         return TRACE_TYPE_PARTIAL_LINE;
3109         }
3110
3111         event = ftrace_find_event(entry->type);
3112         if (event) {
3113                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3114                 if (ret != TRACE_TYPE_HANDLED)
3115                         return ret;
3116         }
3117
3118         SEQ_PUT_FIELD(s, newline);
3119
3120         return trace_handle_return(s);
3121 }
3122
3123 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3124 {
3125         struct trace_array *tr = iter->tr;
3126         struct trace_seq *s = &iter->seq;
3127         struct trace_entry *entry;
3128         struct trace_event *event;
3129
3130         entry = iter->ent;
3131
3132         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3133                 SEQ_PUT_FIELD(s, entry->pid);
3134                 SEQ_PUT_FIELD(s, iter->cpu);
3135                 SEQ_PUT_FIELD(s, iter->ts);
3136                 if (trace_seq_has_overflowed(s))
3137                         return TRACE_TYPE_PARTIAL_LINE;
3138         }
3139
3140         event = ftrace_find_event(entry->type);
3141         return event ? event->funcs->binary(iter, 0, event) :
3142                 TRACE_TYPE_HANDLED;
3143 }
3144
3145 int trace_empty(struct trace_iterator *iter)
3146 {
3147         struct ring_buffer_iter *buf_iter;
3148         int cpu;
3149
3150         /* If we are looking at one CPU buffer, only check that one */
3151         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3152                 cpu = iter->cpu_file;
3153                 buf_iter = trace_buffer_iter(iter, cpu);
3154                 if (buf_iter) {
3155                         if (!ring_buffer_iter_empty(buf_iter))
3156                                 return 0;
3157                 } else {
3158                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3159                                 return 0;
3160                 }
3161                 return 1;
3162         }
3163
3164         for_each_tracing_cpu(cpu) {
3165                 buf_iter = trace_buffer_iter(iter, cpu);
3166                 if (buf_iter) {
3167                         if (!ring_buffer_iter_empty(buf_iter))
3168                                 return 0;
3169                 } else {
3170                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3171                                 return 0;
3172                 }
3173         }
3174
3175         return 1;
3176 }
3177
3178 /*  Called with trace_event_read_lock() held. */
3179 enum print_line_t print_trace_line(struct trace_iterator *iter)
3180 {
3181         struct trace_array *tr = iter->tr;
3182         unsigned long trace_flags = tr->trace_flags;
3183         enum print_line_t ret;
3184
3185         if (iter->lost_events) {
3186                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3187                                  iter->cpu, iter->lost_events);
3188                 if (trace_seq_has_overflowed(&iter->seq))
3189                         return TRACE_TYPE_PARTIAL_LINE;
3190         }
3191
3192         if (iter->trace && iter->trace->print_line) {
3193                 ret = iter->trace->print_line(iter);
3194                 if (ret != TRACE_TYPE_UNHANDLED)
3195                         return ret;
3196         }
3197
3198         if (iter->ent->type == TRACE_BPUTS &&
3199                         trace_flags & TRACE_ITER_PRINTK &&
3200                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3201                 return trace_print_bputs_msg_only(iter);
3202
3203         if (iter->ent->type == TRACE_BPRINT &&
3204                         trace_flags & TRACE_ITER_PRINTK &&
3205                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3206                 return trace_print_bprintk_msg_only(iter);
3207
3208         if (iter->ent->type == TRACE_PRINT &&
3209                         trace_flags & TRACE_ITER_PRINTK &&
3210                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3211                 return trace_print_printk_msg_only(iter);
3212
3213         if (trace_flags & TRACE_ITER_BIN)
3214                 return print_bin_fmt(iter);
3215
3216         if (trace_flags & TRACE_ITER_HEX)
3217                 return print_hex_fmt(iter);
3218
3219         if (trace_flags & TRACE_ITER_RAW)
3220                 return print_raw_fmt(iter);
3221
3222         return print_trace_fmt(iter);
3223 }
3224
3225 void trace_latency_header(struct seq_file *m)
3226 {
3227         struct trace_iterator *iter = m->private;
3228         struct trace_array *tr = iter->tr;
3229
3230         /* print nothing if the buffers are empty */
3231         if (trace_empty(iter))
3232                 return;
3233
3234         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3235                 print_trace_header(m, iter);
3236
3237         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3238                 print_lat_help_header(m);
3239 }
3240
3241 void trace_default_header(struct seq_file *m)
3242 {
3243         struct trace_iterator *iter = m->private;
3244         struct trace_array *tr = iter->tr;
3245         unsigned long trace_flags = tr->trace_flags;
3246
3247         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3248                 return;
3249
3250         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3251                 /* print nothing if the buffers are empty */
3252                 if (trace_empty(iter))
3253                         return;
3254                 print_trace_header(m, iter);
3255                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3256                         print_lat_help_header(m);
3257         } else {
3258                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3259                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3260                                 print_func_help_header_irq(iter->trace_buffer, m);
3261                         else
3262                                 print_func_help_header(iter->trace_buffer, m);
3263                 }
3264         }
3265 }
3266
3267 static void test_ftrace_alive(struct seq_file *m)
3268 {
3269         if (!ftrace_is_dead())
3270                 return;
3271         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3272                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3273 }
3274
3275 #ifdef CONFIG_TRACER_MAX_TRACE
3276 static void show_snapshot_main_help(struct seq_file *m)
3277 {
3278         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3279                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3280                     "#                      Takes a snapshot of the main buffer.\n"
3281                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3282                     "#                      (Doesn't have to be '2' works with any number that\n"
3283                     "#                       is not a '0' or '1')\n");
3284 }
3285
3286 static void show_snapshot_percpu_help(struct seq_file *m)
3287 {
3288         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3289 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3290         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3291                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3292 #else
3293         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3294                     "#                     Must use main snapshot file to allocate.\n");
3295 #endif
3296         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3297                     "#                      (Doesn't have to be '2' works with any number that\n"
3298                     "#                       is not a '0' or '1')\n");
3299 }
3300
3301 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3302 {
3303         if (iter->tr->allocated_snapshot)
3304                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3305         else
3306                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3307
3308         seq_puts(m, "# Snapshot commands:\n");
3309         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3310                 show_snapshot_main_help(m);
3311         else
3312                 show_snapshot_percpu_help(m);
3313 }
3314 #else
3315 /* Should never be called */
3316 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3317 #endif
3318
3319 static int s_show(struct seq_file *m, void *v)
3320 {
3321         struct trace_iterator *iter = v;
3322         int ret;
3323
3324         if (iter->ent == NULL) {
3325                 if (iter->tr) {
3326                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3327                         seq_puts(m, "#\n");
3328                         test_ftrace_alive(m);
3329                 }
3330                 if (iter->snapshot && trace_empty(iter))
3331                         print_snapshot_help(m, iter);
3332                 else if (iter->trace && iter->trace->print_header)
3333                         iter->trace->print_header(m);
3334                 else
3335                         trace_default_header(m);
3336
3337         } else if (iter->leftover) {
3338                 /*
3339                  * If we filled the seq_file buffer earlier, we
3340                  * want to just show it now.
3341                  */
3342                 ret = trace_print_seq(m, &iter->seq);
3343
3344                 /* ret should this time be zero, but you never know */
3345                 iter->leftover = ret;
3346
3347         } else {
3348                 print_trace_line(iter);
3349                 ret = trace_print_seq(m, &iter->seq);
3350                 /*
3351                  * If we overflow the seq_file buffer, then it will
3352                  * ask us for this data again at start up.
3353                  * Use that instead.
3354                  *  ret is 0 if seq_file write succeeded.
3355                  *        -1 otherwise.
3356                  */
3357                 iter->leftover = ret;
3358         }
3359
3360         return 0;
3361 }
3362
3363 /*
3364  * Should be used after trace_array_get(), trace_types_lock
3365  * ensures that i_cdev was already initialized.
3366  */
3367 static inline int tracing_get_cpu(struct inode *inode)
3368 {
3369         if (inode->i_cdev) /* See trace_create_cpu_file() */
3370                 return (long)inode->i_cdev - 1;
3371         return RING_BUFFER_ALL_CPUS;
3372 }
3373
3374 static const struct seq_operations tracer_seq_ops = {
3375         .start          = s_start,
3376         .next           = s_next,
3377         .stop           = s_stop,
3378         .show           = s_show,
3379 };
3380
3381 static struct trace_iterator *
3382 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3383 {
3384         struct trace_array *tr = inode->i_private;
3385         struct trace_iterator *iter;
3386         int cpu;
3387
3388         if (tracing_disabled)
3389                 return ERR_PTR(-ENODEV);
3390
3391         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3392         if (!iter)
3393                 return ERR_PTR(-ENOMEM);
3394
3395         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3396                                     GFP_KERNEL);
3397         if (!iter->buffer_iter)
3398                 goto release;
3399
3400         /*
3401          * We make a copy of the current tracer to avoid concurrent
3402          * changes on it while we are reading.
3403          */
3404         mutex_lock(&trace_types_lock);
3405         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3406         if (!iter->trace)
3407                 goto fail;
3408
3409         *iter->trace = *tr->current_trace;
3410
3411         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3412                 goto fail;
3413
3414         iter->tr = tr;
3415
3416 #ifdef CONFIG_TRACER_MAX_TRACE
3417         /* Currently only the top directory has a snapshot */
3418         if (tr->current_trace->print_max || snapshot)
3419                 iter->trace_buffer = &tr->max_buffer;
3420         else
3421 #endif
3422                 iter->trace_buffer = &tr->trace_buffer;
3423         iter->snapshot = snapshot;
3424         iter->pos = -1;
3425         iter->cpu_file = tracing_get_cpu(inode);
3426         mutex_init(&iter->mutex);
3427
3428         /* Notify the tracer early; before we stop tracing. */
3429         if (iter->trace && iter->trace->open)
3430                 iter->trace->open(iter);
3431
3432         /* Annotate start of buffers if we had overruns */
3433         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3434                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3435
3436         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3437         if (trace_clocks[tr->clock_id].in_ns)
3438                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3439
3440         /* stop the trace while dumping if we are not opening "snapshot" */
3441         if (!iter->snapshot)
3442                 tracing_stop_tr(tr);
3443
3444         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3445                 for_each_tracing_cpu(cpu) {
3446                         iter->buffer_iter[cpu] =
3447                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3448                                                          cpu, GFP_KERNEL);
3449                 }
3450                 ring_buffer_read_prepare_sync();
3451                 for_each_tracing_cpu(cpu) {
3452                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3453                         tracing_iter_reset(iter, cpu);
3454                 }
3455         } else {
3456                 cpu = iter->cpu_file;
3457                 iter->buffer_iter[cpu] =
3458                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3459                                                  cpu, GFP_KERNEL);
3460                 ring_buffer_read_prepare_sync();
3461                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3462                 tracing_iter_reset(iter, cpu);
3463         }
3464
3465         mutex_unlock(&trace_types_lock);
3466
3467         return iter;
3468
3469  fail:
3470         mutex_unlock(&trace_types_lock);
3471         kfree(iter->trace);
3472         kfree(iter->buffer_iter);
3473 release:
3474         seq_release_private(inode, file);
3475         return ERR_PTR(-ENOMEM);
3476 }
3477
3478 int tracing_open_generic(struct inode *inode, struct file *filp)
3479 {
3480         if (tracing_disabled)
3481                 return -ENODEV;
3482
3483         filp->private_data = inode->i_private;
3484         return 0;
3485 }
3486
3487 bool tracing_is_disabled(void)
3488 {
3489         return (tracing_disabled) ? true: false;
3490 }
3491
3492 /*
3493  * Open and update trace_array ref count.
3494  * Must have the current trace_array passed to it.
3495  */
3496 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3497 {
3498         struct trace_array *tr = inode->i_private;
3499
3500         if (tracing_disabled)
3501                 return -ENODEV;
3502
3503         if (trace_array_get(tr) < 0)
3504                 return -ENODEV;
3505
3506         filp->private_data = inode->i_private;
3507
3508         return 0;
3509 }
3510
3511 static int tracing_release(struct inode *inode, struct file *file)
3512 {
3513         struct trace_array *tr = inode->i_private;
3514         struct seq_file *m = file->private_data;
3515         struct trace_iterator *iter;
3516         int cpu;
3517
3518         if (!(file->f_mode & FMODE_READ)) {
3519                 trace_array_put(tr);
3520                 return 0;
3521         }
3522
3523         /* Writes do not use seq_file */
3524         iter = m->private;
3525         mutex_lock(&trace_types_lock);
3526
3527         for_each_tracing_cpu(cpu) {
3528                 if (iter->buffer_iter[cpu])
3529                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3530         }
3531
3532         if (iter->trace && iter->trace->close)
3533                 iter->trace->close(iter);
3534
3535         if (!iter->snapshot)
3536                 /* reenable tracing if it was previously enabled */
3537                 tracing_start_tr(tr);
3538
3539         __trace_array_put(tr);
3540
3541         mutex_unlock(&trace_types_lock);
3542
3543         mutex_destroy(&iter->mutex);
3544         free_cpumask_var(iter->started);
3545         kfree(iter->trace);
3546         kfree(iter->buffer_iter);
3547         seq_release_private(inode, file);
3548
3549         return 0;
3550 }
3551
3552 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3553 {
3554         struct trace_array *tr = inode->i_private;
3555
3556         trace_array_put(tr);
3557         return 0;
3558 }
3559
3560 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3561 {
3562         struct trace_array *tr = inode->i_private;
3563
3564         trace_array_put(tr);
3565
3566         return single_release(inode, file);
3567 }
3568
3569 static int tracing_open(struct inode *inode, struct file *file)
3570 {
3571         struct trace_array *tr = inode->i_private;
3572         struct trace_iterator *iter;
3573         int ret = 0;
3574
3575         if (trace_array_get(tr) < 0)
3576                 return -ENODEV;
3577
3578         /* If this file was open for write, then erase contents */
3579         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3580                 int cpu = tracing_get_cpu(inode);
3581                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3582
3583 #ifdef CONFIG_TRACER_MAX_TRACE
3584                 if (tr->current_trace->print_max)
3585                         trace_buf = &tr->max_buffer;
3586 #endif
3587
3588                 if (cpu == RING_BUFFER_ALL_CPUS)
3589                         tracing_reset_online_cpus(trace_buf);
3590                 else
3591                         tracing_reset(trace_buf, cpu);
3592         }
3593
3594         if (file->f_mode & FMODE_READ) {
3595                 iter = __tracing_open(inode, file, false);
3596                 if (IS_ERR(iter))
3597                         ret = PTR_ERR(iter);
3598                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3599                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3600         }
3601
3602         if (ret < 0)
3603                 trace_array_put(tr);
3604
3605         return ret;
3606 }
3607
3608 /*
3609  * Some tracers are not suitable for instance buffers.
3610  * A tracer is always available for the global array (toplevel)
3611  * or if it explicitly states that it is.
3612  */
3613 static bool
3614 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3615 {
3616         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3617 }
3618
3619 /* Find the next tracer that this trace array may use */
3620 static struct tracer *
3621 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3622 {
3623         while (t && !trace_ok_for_array(t, tr))
3624                 t = t->next;
3625
3626         return t;
3627 }
3628
3629 static void *
3630 t_next(struct seq_file *m, void *v, loff_t *pos)
3631 {
3632         struct trace_array *tr = m->private;
3633         struct tracer *t = v;
3634
3635         (*pos)++;
3636
3637         if (t)
3638                 t = get_tracer_for_array(tr, t->next);
3639
3640         return t;
3641 }
3642
3643 static void *t_start(struct seq_file *m, loff_t *pos)
3644 {
3645         struct trace_array *tr = m->private;
3646         struct tracer *t;
3647         loff_t l = 0;
3648
3649         mutex_lock(&trace_types_lock);
3650
3651         t = get_tracer_for_array(tr, trace_types);
3652         for (; t && l < *pos; t = t_next(m, t, &l))
3653                         ;
3654
3655         return t;
3656 }
3657
3658 static void t_stop(struct seq_file *m, void *p)
3659 {
3660         mutex_unlock(&trace_types_lock);
3661 }
3662
3663 static int t_show(struct seq_file *m, void *v)
3664 {
3665         struct tracer *t = v;
3666
3667         if (!t)
3668                 return 0;
3669
3670         seq_puts(m, t->name);
3671         if (t->next)
3672                 seq_putc(m, ' ');
3673         else
3674                 seq_putc(m, '\n');
3675
3676         return 0;
3677 }
3678
3679 static const struct seq_operations show_traces_seq_ops = {
3680         .start          = t_start,
3681         .next           = t_next,
3682         .stop           = t_stop,
3683         .show           = t_show,
3684 };
3685
3686 static int show_traces_open(struct inode *inode, struct file *file)
3687 {
3688         struct trace_array *tr = inode->i_private;
3689         struct seq_file *m;
3690         int ret;
3691
3692         if (tracing_disabled)
3693                 return -ENODEV;
3694
3695         if (trace_array_get(tr) < 0)
3696                 return -ENODEV;
3697
3698         ret = seq_open(file, &show_traces_seq_ops);
3699         if (ret) {
3700                 trace_array_put(tr);
3701                 return ret;
3702         }
3703
3704         m = file->private_data;
3705         m->private = tr;
3706
3707         return 0;
3708 }
3709
3710 static int show_traces_release(struct inode *inode, struct file *file)
3711 {
3712         struct trace_array *tr = inode->i_private;
3713
3714         trace_array_put(tr);
3715         return seq_release(inode, file);
3716 }
3717
3718 static ssize_t
3719 tracing_write_stub(struct file *filp, const char __user *ubuf,
3720                    size_t count, loff_t *ppos)
3721 {
3722         return count;
3723 }
3724
3725 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3726 {
3727         int ret;
3728
3729         if (file->f_mode & FMODE_READ)
3730                 ret = seq_lseek(file, offset, whence);
3731         else
3732                 file->f_pos = ret = 0;
3733
3734         return ret;
3735 }
3736
3737 static const struct file_operations tracing_fops = {
3738         .open           = tracing_open,
3739         .read           = seq_read,
3740         .write          = tracing_write_stub,
3741         .llseek         = tracing_lseek,
3742         .release        = tracing_release,
3743 };
3744
3745 static const struct file_operations show_traces_fops = {
3746         .open           = show_traces_open,
3747         .read           = seq_read,
3748         .llseek         = seq_lseek,
3749         .release        = show_traces_release,
3750 };
3751
3752 static ssize_t
3753 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3754                      size_t count, loff_t *ppos)
3755 {
3756         struct trace_array *tr = file_inode(filp)->i_private;
3757         char *mask_str;
3758         int len;
3759
3760         len = snprintf(NULL, 0, "%*pb\n",
3761                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3762         mask_str = kmalloc(len, GFP_KERNEL);
3763         if (!mask_str)
3764                 return -ENOMEM;
3765
3766         len = snprintf(mask_str, len, "%*pb\n",
3767                        cpumask_pr_args(tr->tracing_cpumask));
3768         if (len >= count) {
3769                 count = -EINVAL;
3770                 goto out_err;
3771         }
3772         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3773
3774 out_err:
3775         kfree(mask_str);
3776
3777         return count;
3778 }
3779
3780 static ssize_t
3781 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3782                       size_t count, loff_t *ppos)
3783 {
3784         struct trace_array *tr = file_inode(filp)->i_private;
3785         cpumask_var_t tracing_cpumask_new;
3786         int err, cpu;
3787
3788         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3789                 return -ENOMEM;
3790
3791         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3792         if (err)
3793                 goto err_unlock;
3794
3795         local_irq_disable();
3796         arch_spin_lock(&tr->max_lock);
3797         for_each_tracing_cpu(cpu) {
3798                 /*
3799                  * Increase/decrease the disabled counter if we are
3800                  * about to flip a bit in the cpumask:
3801                  */
3802                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3803                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3804                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3805                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3806                 }
3807                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3808                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3809                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3810                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3811                 }
3812         }
3813         arch_spin_unlock(&tr->max_lock);
3814         local_irq_enable();
3815
3816         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3817         free_cpumask_var(tracing_cpumask_new);
3818
3819         return count;
3820
3821 err_unlock:
3822         free_cpumask_var(tracing_cpumask_new);
3823
3824         return err;
3825 }
3826
3827 static const struct file_operations tracing_cpumask_fops = {
3828         .open           = tracing_open_generic_tr,
3829         .read           = tracing_cpumask_read,
3830         .write          = tracing_cpumask_write,
3831         .release        = tracing_release_generic_tr,
3832         .llseek         = generic_file_llseek,
3833 };
3834
3835 static int tracing_trace_options_show(struct seq_file *m, void *v)
3836 {
3837         struct tracer_opt *trace_opts;
3838         struct trace_array *tr = m->private;
3839         u32 tracer_flags;
3840         int i;
3841
3842         mutex_lock(&trace_types_lock);
3843         tracer_flags = tr->current_trace->flags->val;
3844         trace_opts = tr->current_trace->flags->opts;
3845
3846         for (i = 0; trace_options[i]; i++) {
3847                 if (tr->trace_flags & (1 << i))
3848                         seq_printf(m, "%s\n", trace_options[i]);
3849                 else
3850                         seq_printf(m, "no%s\n", trace_options[i]);
3851         }
3852
3853         for (i = 0; trace_opts[i].name; i++) {
3854                 if (tracer_flags & trace_opts[i].bit)
3855                         seq_printf(m, "%s\n", trace_opts[i].name);
3856                 else
3857                         seq_printf(m, "no%s\n", trace_opts[i].name);
3858         }
3859         mutex_unlock(&trace_types_lock);
3860
3861         return 0;
3862 }
3863
3864 static int __set_tracer_option(struct trace_array *tr,
3865                                struct tracer_flags *tracer_flags,
3866                                struct tracer_opt *opts, int neg)
3867 {
3868         struct tracer *trace = tracer_flags->trace;
3869         int ret;
3870
3871         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3872         if (ret)
3873                 return ret;
3874
3875         if (neg)
3876                 tracer_flags->val &= ~opts->bit;
3877         else
3878                 tracer_flags->val |= opts->bit;
3879         return 0;
3880 }
3881
3882 /* Try to assign a tracer specific option */
3883 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3884 {
3885         struct tracer *trace = tr->current_trace;
3886         struct tracer_flags *tracer_flags = trace->flags;
3887         struct tracer_opt *opts = NULL;
3888         int i;
3889
3890         for (i = 0; tracer_flags->opts[i].name; i++) {
3891                 opts = &tracer_flags->opts[i];
3892
3893                 if (strcmp(cmp, opts->name) == 0)
3894                         return __set_tracer_option(tr, trace->flags, opts, neg);
3895         }
3896
3897         return -EINVAL;
3898 }
3899
3900 /* Some tracers require overwrite to stay enabled */
3901 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3902 {
3903         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3904                 return -1;
3905
3906         return 0;
3907 }
3908
3909 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3910 {
3911         /* do nothing if flag is already set */
3912         if (!!(tr->trace_flags & mask) == !!enabled)
3913                 return 0;
3914
3915         /* Give the tracer a chance to approve the change */
3916         if (tr->current_trace->flag_changed)
3917                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3918                         return -EINVAL;
3919
3920         if (enabled)
3921                 tr->trace_flags |= mask;
3922         else
3923                 tr->trace_flags &= ~mask;
3924
3925         if (mask == TRACE_ITER_RECORD_CMD)
3926                 trace_event_enable_cmd_record(enabled);
3927
3928         if (mask == TRACE_ITER_EVENT_FORK)
3929                 trace_event_follow_fork(tr, enabled);
3930
3931         if (mask == TRACE_ITER_OVERWRITE) {
3932                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3935 #endif
3936         }
3937
3938         if (mask == TRACE_ITER_PRINTK) {
3939                 trace_printk_start_stop_comm(enabled);
3940                 trace_printk_control(enabled);
3941         }
3942
3943         return 0;
3944 }
3945
3946 static int trace_set_options(struct trace_array *tr, char *option)
3947 {
3948         char *cmp;
3949         int neg = 0;
3950         int ret = -ENODEV;
3951         int i;
3952         size_t orig_len = strlen(option);
3953
3954         cmp = strstrip(option);
3955
3956         if (strncmp(cmp, "no", 2) == 0) {
3957                 neg = 1;
3958                 cmp += 2;
3959         }
3960
3961         mutex_lock(&trace_types_lock);
3962
3963         for (i = 0; trace_options[i]; i++) {
3964                 if (strcmp(cmp, trace_options[i]) == 0) {
3965                         ret = set_tracer_flag(tr, 1 << i, !neg);
3966                         break;
3967                 }
3968         }
3969
3970         /* If no option could be set, test the specific tracer options */
3971         if (!trace_options[i])
3972                 ret = set_tracer_option(tr, cmp, neg);
3973
3974         mutex_unlock(&trace_types_lock);
3975
3976         /*
3977          * If the first trailing whitespace is replaced with '\0' by strstrip,
3978          * turn it back into a space.
3979          */
3980         if (orig_len > strlen(option))
3981                 option[strlen(option)] = ' ';
3982
3983         return ret;
3984 }
3985
3986 static void __init apply_trace_boot_options(void)
3987 {
3988         char *buf = trace_boot_options_buf;
3989         char *option;
3990
3991         while (true) {
3992                 option = strsep(&buf, ",");
3993
3994                 if (!option)
3995                         break;
3996
3997                 if (*option)
3998                         trace_set_options(&global_trace, option);
3999
4000                 /* Put back the comma to allow this to be called again */
4001                 if (buf)
4002                         *(buf - 1) = ',';
4003         }
4004 }
4005
4006 static ssize_t
4007 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4008                         size_t cnt, loff_t *ppos)
4009 {
4010         struct seq_file *m = filp->private_data;
4011         struct trace_array *tr = m->private;
4012         char buf[64];
4013         int ret;
4014
4015         if (cnt >= sizeof(buf))
4016                 return -EINVAL;
4017
4018         if (copy_from_user(buf, ubuf, cnt))
4019                 return -EFAULT;
4020
4021         buf[cnt] = 0;
4022
4023         ret = trace_set_options(tr, buf);
4024         if (ret < 0)
4025                 return ret;
4026
4027         *ppos += cnt;
4028
4029         return cnt;
4030 }
4031
4032 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4033 {
4034         struct trace_array *tr = inode->i_private;
4035         int ret;
4036
4037         if (tracing_disabled)
4038                 return -ENODEV;
4039
4040         if (trace_array_get(tr) < 0)
4041                 return -ENODEV;
4042
4043         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4044         if (ret < 0)
4045                 trace_array_put(tr);
4046
4047         return ret;
4048 }
4049
4050 static const struct file_operations tracing_iter_fops = {
4051         .open           = tracing_trace_options_open,
4052         .read           = seq_read,
4053         .llseek         = seq_lseek,
4054         .release        = tracing_single_release_tr,
4055         .write          = tracing_trace_options_write,
4056 };
4057
4058 static const char readme_msg[] =
4059         "tracing mini-HOWTO:\n\n"
4060         "# echo 0 > tracing_on : quick way to disable tracing\n"
4061         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4062         " Important files:\n"
4063         "  trace\t\t\t- The static contents of the buffer\n"
4064         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4065         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4066         "  current_tracer\t- function and latency tracers\n"
4067         "  available_tracers\t- list of configured tracers for current_tracer\n"
4068         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4069         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4070         "  trace_clock\t\t-change the clock used to order events\n"
4071         "       local:   Per cpu clock but may not be synced across CPUs\n"
4072         "      global:   Synced across CPUs but slows tracing down.\n"
4073         "     counter:   Not a clock, but just an increment\n"
4074         "      uptime:   Jiffy counter from time of boot\n"
4075         "        perf:   Same clock that perf events use\n"
4076 #ifdef CONFIG_X86_64
4077         "     x86-tsc:   TSC cycle counter\n"
4078 #endif
4079         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4080         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4081         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4082         "\t\t\t  Remove sub-buffer with rmdir\n"
4083         "  trace_options\t\t- Set format or modify how tracing happens\n"
4084         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4085         "\t\t\t  option name\n"
4086         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4087 #ifdef CONFIG_DYNAMIC_FTRACE
4088         "\n  available_filter_functions - list of functions that can be filtered on\n"
4089         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4090         "\t\t\t  functions\n"
4091         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4092         "\t     modules: Can select a group via module\n"
4093         "\t      Format: :mod:<module-name>\n"
4094         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4095         "\t    triggers: a command to perform when function is hit\n"
4096         "\t      Format: <function>:<trigger>[:count]\n"
4097         "\t     trigger: traceon, traceoff\n"
4098         "\t\t      enable_event:<system>:<event>\n"
4099         "\t\t      disable_event:<system>:<event>\n"
4100 #ifdef CONFIG_STACKTRACE
4101         "\t\t      stacktrace\n"
4102 #endif
4103 #ifdef CONFIG_TRACER_SNAPSHOT
4104         "\t\t      snapshot\n"
4105 #endif
4106         "\t\t      dump\n"
4107         "\t\t      cpudump\n"
4108         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4109         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4110         "\t     The first one will disable tracing every time do_fault is hit\n"
4111         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4112         "\t       The first time do trap is hit and it disables tracing, the\n"
4113         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4114         "\t       the counter will not decrement. It only decrements when the\n"
4115         "\t       trigger did work\n"
4116         "\t     To remove trigger without count:\n"
4117         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4118         "\t     To remove trigger with a count:\n"
4119         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4120         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4121         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4122         "\t    modules: Can select a group via module command :mod:\n"
4123         "\t    Does not accept triggers\n"
4124 #endif /* CONFIG_DYNAMIC_FTRACE */
4125 #ifdef CONFIG_FUNCTION_TRACER
4126         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4127         "\t\t    (function)\n"
4128 #endif
4129 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4130         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4131         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4132         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4133 #endif
4134 #ifdef CONFIG_TRACER_SNAPSHOT
4135         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4136         "\t\t\t  snapshot buffer. Read the contents for more\n"
4137         "\t\t\t  information\n"
4138 #endif
4139 #ifdef CONFIG_STACK_TRACER
4140         "  stack_trace\t\t- Shows the max stack trace when active\n"
4141         "  stack_max_size\t- Shows current max stack size that was traced\n"
4142         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4143         "\t\t\t  new trace)\n"
4144 #ifdef CONFIG_DYNAMIC_FTRACE
4145         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4146         "\t\t\t  traces\n"
4147 #endif
4148 #endif /* CONFIG_STACK_TRACER */
4149 #ifdef CONFIG_KPROBE_EVENT
4150         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4151         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4152 #endif
4153 #ifdef CONFIG_UPROBE_EVENT
4154         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4155         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4156 #endif
4157 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4158         "\t  accepts: event-definitions (one definition per line)\n"
4159         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4160         "\t           -:[<group>/]<event>\n"
4161 #ifdef CONFIG_KPROBE_EVENT
4162         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4163 #endif
4164 #ifdef CONFIG_UPROBE_EVENT
4165         "\t    place: <path>:<offset>\n"
4166 #endif
4167         "\t     args: <name>=fetcharg[:type]\n"
4168         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4169         "\t           $stack<index>, $stack, $retval, $comm\n"
4170         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4171         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4172 #endif
4173         "  events/\t\t- Directory containing all trace event subsystems:\n"
4174         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4175         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4176         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4177         "\t\t\t  events\n"
4178         "      filter\t\t- If set, only events passing filter are traced\n"
4179         "  events/<system>/<event>/\t- Directory containing control files for\n"
4180         "\t\t\t  <event>:\n"
4181         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4182         "      filter\t\t- If set, only events passing filter are traced\n"
4183         "      trigger\t\t- If set, a command to perform when event is hit\n"
4184         "\t    Format: <trigger>[:count][if <filter>]\n"
4185         "\t   trigger: traceon, traceoff\n"
4186         "\t            enable_event:<system>:<event>\n"
4187         "\t            disable_event:<system>:<event>\n"
4188 #ifdef CONFIG_HIST_TRIGGERS
4189         "\t            enable_hist:<system>:<event>\n"
4190         "\t            disable_hist:<system>:<event>\n"
4191 #endif
4192 #ifdef CONFIG_STACKTRACE
4193         "\t\t    stacktrace\n"
4194 #endif
4195 #ifdef CONFIG_TRACER_SNAPSHOT
4196         "\t\t    snapshot\n"
4197 #endif
4198 #ifdef CONFIG_HIST_TRIGGERS
4199         "\t\t    hist (see below)\n"
4200 #endif
4201         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4202         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4203         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4204         "\t                  events/block/block_unplug/trigger\n"
4205         "\t   The first disables tracing every time block_unplug is hit.\n"
4206         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4207         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4208         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4209         "\t   Like function triggers, the counter is only decremented if it\n"
4210         "\t    enabled or disabled tracing.\n"
4211         "\t   To remove a trigger without a count:\n"
4212         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4213         "\t   To remove a trigger with a count:\n"
4214         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4215         "\t   Filters can be ignored when removing a trigger.\n"
4216 #ifdef CONFIG_HIST_TRIGGERS
4217         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4218         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4219         "\t            [:values=<field1[,field2,...]>]\n"
4220         "\t            [:sort=<field1[,field2,...]>]\n"
4221         "\t            [:size=#entries]\n"
4222         "\t            [:pause][:continue][:clear]\n"
4223         "\t            [:name=histname1]\n"
4224         "\t            [if <filter>]\n\n"
4225         "\t    When a matching event is hit, an entry is added to a hash\n"
4226         "\t    table using the key(s) and value(s) named, and the value of a\n"
4227         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4228         "\t    correspond to fields in the event's format description.  Keys\n"
4229         "\t    can be any field, or the special string 'stacktrace'.\n"
4230         "\t    Compound keys consisting of up to two fields can be specified\n"
4231         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4232         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4233         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4234         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4235         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4236         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4237         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4238         "\t    its histogram data will be shared with other triggers of the\n"
4239         "\t    same name, and trigger hits will update this common data.\n\n"
4240         "\t    Reading the 'hist' file for the event will dump the hash\n"
4241         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4242         "\t    triggers attached to an event, there will be a table for each\n"
4243         "\t    trigger in the output.  The table displayed for a named\n"
4244         "\t    trigger will be the same as any other instance having the\n"
4245         "\t    same name.  The default format used to display a given field\n"
4246         "\t    can be modified by appending any of the following modifiers\n"
4247         "\t    to the field name, as applicable:\n\n"
4248         "\t            .hex        display a number as a hex value\n"
4249         "\t            .sym        display an address as a symbol\n"
4250         "\t            .sym-offset display an address as a symbol and offset\n"
4251         "\t            .execname   display a common_pid as a program name\n"
4252         "\t            .syscall    display a syscall id as a syscall name\n\n"
4253         "\t            .log2       display log2 value rather than raw number\n\n"
4254         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4255         "\t    trigger or to start a hist trigger but not log any events\n"
4256         "\t    until told to do so.  'continue' can be used to start or\n"
4257         "\t    restart a paused hist trigger.\n\n"
4258         "\t    The 'clear' parameter will clear the contents of a running\n"
4259         "\t    hist trigger and leave its current paused/active state\n"
4260         "\t    unchanged.\n\n"
4261         "\t    The enable_hist and disable_hist triggers can be used to\n"
4262         "\t    have one event conditionally start and stop another event's\n"
4263         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4264         "\t    the enable_event and disable_event triggers.\n"
4265 #endif
4266 ;
4267
4268 static ssize_t
4269 tracing_readme_read(struct file *filp, char __user *ubuf,
4270                        size_t cnt, loff_t *ppos)
4271 {
4272         return simple_read_from_buffer(ubuf, cnt, ppos,
4273                                         readme_msg, strlen(readme_msg));
4274 }
4275
4276 static const struct file_operations tracing_readme_fops = {
4277         .open           = tracing_open_generic,
4278         .read           = tracing_readme_read,
4279         .llseek         = generic_file_llseek,
4280 };
4281
4282 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4283 {
4284         unsigned int *ptr = v;
4285
4286         if (*pos || m->count)
4287                 ptr++;
4288
4289         (*pos)++;
4290
4291         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4292              ptr++) {
4293                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4294                         continue;
4295
4296                 return ptr;
4297         }
4298
4299         return NULL;
4300 }
4301
4302 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4303 {
4304         void *v;
4305         loff_t l = 0;
4306
4307         preempt_disable();
4308         arch_spin_lock(&trace_cmdline_lock);
4309
4310         v = &savedcmd->map_cmdline_to_pid[0];
4311         while (l <= *pos) {
4312                 v = saved_cmdlines_next(m, v, &l);
4313                 if (!v)
4314                         return NULL;
4315         }
4316
4317         return v;
4318 }
4319
4320 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4321 {
4322         arch_spin_unlock(&trace_cmdline_lock);
4323         preempt_enable();
4324 }
4325
4326 static int saved_cmdlines_show(struct seq_file *m, void *v)
4327 {
4328         char buf[TASK_COMM_LEN];
4329         unsigned int *pid = v;
4330
4331         __trace_find_cmdline(*pid, buf);
4332         seq_printf(m, "%d %s\n", *pid, buf);
4333         return 0;
4334 }
4335
4336 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4337         .start          = saved_cmdlines_start,
4338         .next           = saved_cmdlines_next,
4339         .stop           = saved_cmdlines_stop,
4340         .show           = saved_cmdlines_show,
4341 };
4342
4343 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4344 {
4345         if (tracing_disabled)
4346                 return -ENODEV;
4347
4348         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4349 }
4350
4351 static const struct file_operations tracing_saved_cmdlines_fops = {
4352         .open           = tracing_saved_cmdlines_open,
4353         .read           = seq_read,
4354         .llseek         = seq_lseek,
4355         .release        = seq_release,
4356 };
4357
4358 static ssize_t
4359 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4360                                  size_t cnt, loff_t *ppos)
4361 {
4362         char buf[64];
4363         int r;
4364
4365         arch_spin_lock(&trace_cmdline_lock);
4366         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4367         arch_spin_unlock(&trace_cmdline_lock);
4368
4369         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4370 }
4371
4372 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4373 {
4374         kfree(s->saved_cmdlines);
4375         kfree(s->map_cmdline_to_pid);
4376         kfree(s);
4377 }
4378
4379 static int tracing_resize_saved_cmdlines(unsigned int val)
4380 {
4381         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4382
4383         s = kmalloc(sizeof(*s), GFP_KERNEL);
4384         if (!s)
4385                 return -ENOMEM;
4386
4387         if (allocate_cmdlines_buffer(val, s) < 0) {
4388                 kfree(s);
4389                 return -ENOMEM;
4390         }
4391
4392         arch_spin_lock(&trace_cmdline_lock);
4393         savedcmd_temp = savedcmd;
4394         savedcmd = s;
4395         arch_spin_unlock(&trace_cmdline_lock);
4396         free_saved_cmdlines_buffer(savedcmd_temp);
4397
4398         return 0;
4399 }
4400
4401 static ssize_t
4402 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4403                                   size_t cnt, loff_t *ppos)
4404 {
4405         unsigned long val;
4406         int ret;
4407
4408         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4409         if (ret)
4410                 return ret;
4411
4412         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4413         if (!val || val > PID_MAX_DEFAULT)
4414                 return -EINVAL;
4415
4416         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4417         if (ret < 0)
4418                 return ret;
4419
4420         *ppos += cnt;
4421
4422         return cnt;
4423 }
4424
4425 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4426         .open           = tracing_open_generic,
4427         .read           = tracing_saved_cmdlines_size_read,
4428         .write          = tracing_saved_cmdlines_size_write,
4429 };
4430
4431 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4432 static union trace_enum_map_item *
4433 update_enum_map(union trace_enum_map_item *ptr)
4434 {
4435         if (!ptr->map.enum_string) {
4436                 if (ptr->tail.next) {
4437                         ptr = ptr->tail.next;
4438                         /* Set ptr to the next real item (skip head) */
4439                         ptr++;
4440                 } else
4441                         return NULL;
4442         }
4443         return ptr;
4444 }
4445
4446 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4447 {
4448         union trace_enum_map_item *ptr = v;
4449
4450         /*
4451          * Paranoid! If ptr points to end, we don't want to increment past it.
4452          * This really should never happen.
4453          */
4454         ptr = update_enum_map(ptr);
4455         if (WARN_ON_ONCE(!ptr))
4456                 return NULL;
4457
4458         ptr++;
4459
4460         (*pos)++;
4461
4462         ptr = update_enum_map(ptr);
4463
4464         return ptr;
4465 }
4466
4467 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4468 {
4469         union trace_enum_map_item *v;
4470         loff_t l = 0;
4471
4472         mutex_lock(&trace_enum_mutex);
4473
4474         v = trace_enum_maps;
4475         if (v)
4476                 v++;
4477
4478         while (v && l < *pos) {
4479                 v = enum_map_next(m, v, &l);
4480         }
4481
4482         return v;
4483 }
4484
4485 static void enum_map_stop(struct seq_file *m, void *v)
4486 {
4487         mutex_unlock(&trace_enum_mutex);
4488 }
4489
4490 static int enum_map_show(struct seq_file *m, void *v)
4491 {
4492         union trace_enum_map_item *ptr = v;
4493
4494         seq_printf(m, "%s %ld (%s)\n",
4495                    ptr->map.enum_string, ptr->map.enum_value,
4496                    ptr->map.system);
4497
4498         return 0;
4499 }
4500
4501 static const struct seq_operations tracing_enum_map_seq_ops = {
4502         .start          = enum_map_start,
4503         .next           = enum_map_next,
4504         .stop           = enum_map_stop,
4505         .show           = enum_map_show,
4506 };
4507
4508 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4509 {
4510         if (tracing_disabled)
4511                 return -ENODEV;
4512
4513         return seq_open(filp, &tracing_enum_map_seq_ops);
4514 }
4515
4516 static const struct file_operations tracing_enum_map_fops = {
4517         .open           = tracing_enum_map_open,
4518         .read           = seq_read,
4519         .llseek         = seq_lseek,
4520         .release        = seq_release,
4521 };
4522
4523 static inline union trace_enum_map_item *
4524 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4525 {
4526         /* Return tail of array given the head */
4527         return ptr + ptr->head.length + 1;
4528 }
4529
4530 static void
4531 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4532                            int len)
4533 {
4534         struct trace_enum_map **stop;
4535         struct trace_enum_map **map;
4536         union trace_enum_map_item *map_array;
4537         union trace_enum_map_item *ptr;
4538
4539         stop = start + len;
4540
4541         /*
4542          * The trace_enum_maps contains the map plus a head and tail item,
4543          * where the head holds the module and length of array, and the
4544          * tail holds a pointer to the next list.
4545          */
4546         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4547         if (!map_array) {
4548                 pr_warn("Unable to allocate trace enum mapping\n");
4549                 return;
4550         }
4551
4552         mutex_lock(&trace_enum_mutex);
4553
4554         if (!trace_enum_maps)
4555                 trace_enum_maps = map_array;
4556         else {
4557                 ptr = trace_enum_maps;
4558                 for (;;) {
4559                         ptr = trace_enum_jmp_to_tail(ptr);
4560                         if (!ptr->tail.next)
4561                                 break;
4562                         ptr = ptr->tail.next;
4563
4564                 }
4565                 ptr->tail.next = map_array;
4566         }
4567         map_array->head.mod = mod;
4568         map_array->head.length = len;
4569         map_array++;
4570
4571         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4572                 map_array->map = **map;
4573                 map_array++;
4574         }
4575         memset(map_array, 0, sizeof(*map_array));
4576
4577         mutex_unlock(&trace_enum_mutex);
4578 }
4579
4580 static void trace_create_enum_file(struct dentry *d_tracer)
4581 {
4582         trace_create_file("enum_map", 0444, d_tracer,
4583                           NULL, &tracing_enum_map_fops);
4584 }
4585
4586 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4587 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4588 static inline void trace_insert_enum_map_file(struct module *mod,
4589                               struct trace_enum_map **start, int len) { }
4590 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4591
4592 static void trace_insert_enum_map(struct module *mod,
4593                                   struct trace_enum_map **start, int len)
4594 {
4595         struct trace_enum_map **map;
4596
4597         if (len <= 0)
4598                 return;
4599
4600         map = start;
4601
4602         trace_event_enum_update(map, len);
4603
4604         trace_insert_enum_map_file(mod, start, len);
4605 }
4606
4607 static ssize_t
4608 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4609                        size_t cnt, loff_t *ppos)
4610 {
4611         struct trace_array *tr = filp->private_data;
4612         char buf[MAX_TRACER_SIZE+2];
4613         int r;
4614
4615         mutex_lock(&trace_types_lock);
4616         r = sprintf(buf, "%s\n", tr->current_trace->name);
4617         mutex_unlock(&trace_types_lock);
4618
4619         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4620 }
4621
4622 int tracer_init(struct tracer *t, struct trace_array *tr)
4623 {
4624         tracing_reset_online_cpus(&tr->trace_buffer);
4625         return t->init(tr);
4626 }
4627
4628 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4629 {
4630         int cpu;
4631
4632         for_each_tracing_cpu(cpu)
4633                 per_cpu_ptr(buf->data, cpu)->entries = val;
4634 }
4635
4636 #ifdef CONFIG_TRACER_MAX_TRACE
4637 /* resize @tr's buffer to the size of @size_tr's entries */
4638 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4639                                         struct trace_buffer *size_buf, int cpu_id)
4640 {
4641         int cpu, ret = 0;
4642
4643         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4644                 for_each_tracing_cpu(cpu) {
4645                         ret = ring_buffer_resize(trace_buf->buffer,
4646                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4647                         if (ret < 0)
4648                                 break;
4649                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4650                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4651                 }
4652         } else {
4653                 ret = ring_buffer_resize(trace_buf->buffer,
4654                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4655                 if (ret == 0)
4656                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4657                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4658         }
4659
4660         return ret;
4661 }
4662 #endif /* CONFIG_TRACER_MAX_TRACE */
4663
4664 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4665                                         unsigned long size, int cpu)
4666 {
4667         int ret;
4668
4669         /*
4670          * If kernel or user changes the size of the ring buffer
4671          * we use the size that was given, and we can forget about
4672          * expanding it later.
4673          */
4674         ring_buffer_expanded = true;
4675
4676         /* May be called before buffers are initialized */
4677         if (!tr->trace_buffer.buffer)
4678                 return 0;
4679
4680         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4681         if (ret < 0)
4682                 return ret;
4683
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4686             !tr->current_trace->use_max_tr)
4687                 goto out;
4688
4689         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4690         if (ret < 0) {
4691                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4692                                                      &tr->trace_buffer, cpu);
4693                 if (r < 0) {
4694                         /*
4695                          * AARGH! We are left with different
4696                          * size max buffer!!!!
4697                          * The max buffer is our "snapshot" buffer.
4698                          * When a tracer needs a snapshot (one of the
4699                          * latency tracers), it swaps the max buffer
4700                          * with the saved snap shot. We succeeded to
4701                          * update the size of the main buffer, but failed to
4702                          * update the size of the max buffer. But when we tried
4703                          * to reset the main buffer to the original size, we
4704                          * failed there too. This is very unlikely to
4705                          * happen, but if it does, warn and kill all
4706                          * tracing.
4707                          */
4708                         WARN_ON(1);
4709                         tracing_disabled = 1;
4710                 }
4711                 return ret;
4712         }
4713
4714         if (cpu == RING_BUFFER_ALL_CPUS)
4715                 set_buffer_entries(&tr->max_buffer, size);
4716         else
4717                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4718
4719  out:
4720 #endif /* CONFIG_TRACER_MAX_TRACE */
4721
4722         if (cpu == RING_BUFFER_ALL_CPUS)
4723                 set_buffer_entries(&tr->trace_buffer, size);
4724         else
4725                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4726
4727         return ret;
4728 }
4729
4730 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4731                                           unsigned long size, int cpu_id)
4732 {
4733         int ret = size;
4734
4735         mutex_lock(&trace_types_lock);
4736
4737         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4738                 /* make sure, this cpu is enabled in the mask */
4739                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4740                         ret = -EINVAL;
4741                         goto out;
4742                 }
4743         }
4744
4745         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4746         if (ret < 0)
4747                 ret = -ENOMEM;
4748
4749 out:
4750         mutex_unlock(&trace_types_lock);
4751
4752         return ret;
4753 }
4754
4755
4756 /**
4757  * tracing_update_buffers - used by tracing facility to expand ring buffers
4758  *
4759  * To save on memory when the tracing is never used on a system with it
4760  * configured in. The ring buffers are set to a minimum size. But once
4761  * a user starts to use the tracing facility, then they need to grow
4762  * to their default size.
4763  *
4764  * This function is to be called when a tracer is about to be used.
4765  */
4766 int tracing_update_buffers(void)
4767 {
4768         int ret = 0;
4769
4770         mutex_lock(&trace_types_lock);
4771         if (!ring_buffer_expanded)
4772                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4773                                                 RING_BUFFER_ALL_CPUS);
4774         mutex_unlock(&trace_types_lock);
4775
4776         return ret;
4777 }
4778
4779 struct trace_option_dentry;
4780
4781 static void
4782 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4783
4784 /*
4785  * Used to clear out the tracer before deletion of an instance.
4786  * Must have trace_types_lock held.
4787  */
4788 static void tracing_set_nop(struct trace_array *tr)
4789 {
4790         if (tr->current_trace == &nop_trace)
4791                 return;
4792         
4793         tr->current_trace->enabled--;
4794
4795         if (tr->current_trace->reset)
4796                 tr->current_trace->reset(tr);
4797
4798         tr->current_trace = &nop_trace;
4799 }
4800
4801 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4802 {
4803         /* Only enable if the directory has been created already. */
4804         if (!tr->dir)
4805                 return;
4806
4807         create_trace_option_files(tr, t);
4808 }
4809
4810 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4811 {
4812         struct tracer *t;
4813 #ifdef CONFIG_TRACER_MAX_TRACE
4814         bool had_max_tr;
4815 #endif
4816         int ret = 0;
4817
4818         mutex_lock(&trace_types_lock);
4819
4820         if (!ring_buffer_expanded) {
4821                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4822                                                 RING_BUFFER_ALL_CPUS);
4823                 if (ret < 0)
4824                         goto out;
4825                 ret = 0;
4826         }
4827
4828         for (t = trace_types; t; t = t->next) {
4829                 if (strcmp(t->name, buf) == 0)
4830                         break;
4831         }
4832         if (!t) {
4833                 ret = -EINVAL;
4834                 goto out;
4835         }
4836         if (t == tr->current_trace)
4837                 goto out;
4838
4839         /* Some tracers are only allowed for the top level buffer */
4840         if (!trace_ok_for_array(t, tr)) {
4841                 ret = -EINVAL;
4842                 goto out;
4843         }
4844
4845         /* If trace pipe files are being read, we can't change the tracer */
4846         if (tr->current_trace->ref) {
4847                 ret = -EBUSY;
4848                 goto out;
4849         }
4850
4851         trace_branch_disable();
4852
4853         tr->current_trace->enabled--;
4854
4855         if (tr->current_trace->reset)
4856                 tr->current_trace->reset(tr);
4857
4858         /* Current trace needs to be nop_trace before synchronize_sched */
4859         tr->current_trace = &nop_trace;
4860
4861 #ifdef CONFIG_TRACER_MAX_TRACE
4862         had_max_tr = tr->allocated_snapshot;
4863
4864         if (had_max_tr && !t->use_max_tr) {
4865                 /*
4866                  * We need to make sure that the update_max_tr sees that
4867                  * current_trace changed to nop_trace to keep it from
4868                  * swapping the buffers after we resize it.
4869                  * The update_max_tr is called from interrupts disabled
4870                  * so a synchronized_sched() is sufficient.
4871                  */
4872                 synchronize_sched();
4873                 free_snapshot(tr);
4874         }
4875 #endif
4876
4877 #ifdef CONFIG_TRACER_MAX_TRACE
4878         if (t->use_max_tr && !had_max_tr) {
4879                 ret = alloc_snapshot(tr);
4880                 if (ret < 0)
4881                         goto out;
4882         }
4883 #endif
4884
4885         if (t->init) {
4886                 ret = tracer_init(t, tr);
4887                 if (ret)
4888                         goto out;
4889         }
4890
4891         tr->current_trace = t;
4892         tr->current_trace->enabled++;
4893         trace_branch_enable(tr);
4894  out:
4895         mutex_unlock(&trace_types_lock);
4896
4897         return ret;
4898 }
4899
4900 static ssize_t
4901 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4902                         size_t cnt, loff_t *ppos)
4903 {
4904         struct trace_array *tr = filp->private_data;
4905         char buf[MAX_TRACER_SIZE+1];
4906         int i;
4907         size_t ret;
4908         int err;
4909
4910         ret = cnt;
4911
4912         if (cnt > MAX_TRACER_SIZE)
4913                 cnt = MAX_TRACER_SIZE;
4914
4915         if (copy_from_user(buf, ubuf, cnt))
4916                 return -EFAULT;
4917
4918         buf[cnt] = 0;
4919
4920         /* strip ending whitespace. */
4921         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4922                 buf[i] = 0;
4923
4924         err = tracing_set_tracer(tr, buf);
4925         if (err)
4926                 return err;
4927
4928         *ppos += ret;
4929
4930         return ret;
4931 }
4932
4933 static ssize_t
4934 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4935                    size_t cnt, loff_t *ppos)
4936 {
4937         char buf[64];
4938         int r;
4939
4940         r = snprintf(buf, sizeof(buf), "%ld\n",
4941                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4942         if (r > sizeof(buf))
4943                 r = sizeof(buf);
4944         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4945 }
4946
4947 static ssize_t
4948 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4949                     size_t cnt, loff_t *ppos)
4950 {
4951         unsigned long val;
4952         int ret;
4953
4954         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4955         if (ret)
4956                 return ret;
4957
4958         *ptr = val * 1000;
4959
4960         return cnt;
4961 }
4962
4963 static ssize_t
4964 tracing_thresh_read(struct file *filp, char __user *ubuf,
4965                     size_t cnt, loff_t *ppos)
4966 {
4967         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4968 }
4969
4970 static ssize_t
4971 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4972                      size_t cnt, loff_t *ppos)
4973 {
4974         struct trace_array *tr = filp->private_data;
4975         int ret;
4976
4977         mutex_lock(&trace_types_lock);
4978         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4979         if (ret < 0)
4980                 goto out;
4981
4982         if (tr->current_trace->update_thresh) {
4983                 ret = tr->current_trace->update_thresh(tr);
4984                 if (ret < 0)
4985                         goto out;
4986         }
4987
4988         ret = cnt;
4989 out:
4990         mutex_unlock(&trace_types_lock);
4991
4992         return ret;
4993 }
4994
4995 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4996
4997 static ssize_t
4998 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4999                      size_t cnt, loff_t *ppos)
5000 {
5001         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5002 }
5003
5004 static ssize_t
5005 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5006                       size_t cnt, loff_t *ppos)
5007 {
5008         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5009 }
5010
5011 #endif
5012
5013 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5014 {
5015         struct trace_array *tr = inode->i_private;
5016         struct trace_iterator *iter;
5017         int ret = 0;
5018
5019         if (tracing_disabled)
5020                 return -ENODEV;
5021
5022         if (trace_array_get(tr) < 0)
5023                 return -ENODEV;
5024
5025         mutex_lock(&trace_types_lock);
5026
5027         /* create a buffer to store the information to pass to userspace */
5028         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5029         if (!iter) {
5030                 ret = -ENOMEM;
5031                 __trace_array_put(tr);
5032                 goto out;
5033         }
5034
5035         trace_seq_init(&iter->seq);
5036         iter->trace = tr->current_trace;
5037
5038         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5039                 ret = -ENOMEM;
5040                 goto fail;
5041         }
5042
5043         /* trace pipe does not show start of buffer */
5044         cpumask_setall(iter->started);
5045
5046         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5047                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5048
5049         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5050         if (trace_clocks[tr->clock_id].in_ns)
5051                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5052
5053         iter->tr = tr;
5054         iter->trace_buffer = &tr->trace_buffer;
5055         iter->cpu_file = tracing_get_cpu(inode);
5056         mutex_init(&iter->mutex);
5057         filp->private_data = iter;
5058
5059         if (iter->trace->pipe_open)
5060                 iter->trace->pipe_open(iter);
5061
5062         nonseekable_open(inode, filp);
5063
5064         tr->current_trace->ref++;
5065 out:
5066         mutex_unlock(&trace_types_lock);
5067         return ret;
5068
5069 fail:
5070         kfree(iter);
5071         __trace_array_put(tr);
5072         mutex_unlock(&trace_types_lock);
5073         return ret;
5074 }
5075
5076 static int tracing_release_pipe(struct inode *inode, struct file *file)
5077 {
5078         struct trace_iterator *iter = file->private_data;
5079         struct trace_array *tr = inode->i_private;
5080
5081         mutex_lock(&trace_types_lock);
5082
5083         tr->current_trace->ref--;
5084
5085         if (iter->trace->pipe_close)
5086                 iter->trace->pipe_close(iter);
5087
5088         mutex_unlock(&trace_types_lock);
5089
5090         free_cpumask_var(iter->started);
5091         mutex_destroy(&iter->mutex);
5092         kfree(iter);
5093
5094         trace_array_put(tr);
5095
5096         return 0;
5097 }
5098
5099 static unsigned int
5100 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5101 {
5102         struct trace_array *tr = iter->tr;
5103
5104         /* Iterators are static, they should be filled or empty */
5105         if (trace_buffer_iter(iter, iter->cpu_file))
5106                 return POLLIN | POLLRDNORM;
5107
5108         if (tr->trace_flags & TRACE_ITER_BLOCK)
5109                 /*
5110                  * Always select as readable when in blocking mode
5111                  */
5112                 return POLLIN | POLLRDNORM;
5113         else
5114                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5115                                              filp, poll_table);
5116 }
5117
5118 static unsigned int
5119 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5120 {
5121         struct trace_iterator *iter = filp->private_data;
5122
5123         return trace_poll(iter, filp, poll_table);
5124 }
5125
5126 /* Must be called with iter->mutex held. */
5127 static int tracing_wait_pipe(struct file *filp)
5128 {
5129         struct trace_iterator *iter = filp->private_data;
5130         int ret;
5131
5132         while (trace_empty(iter)) {
5133
5134                 if ((filp->f_flags & O_NONBLOCK)) {
5135                         return -EAGAIN;
5136                 }
5137
5138                 /*
5139                  * We block until we read something and tracing is disabled.
5140                  * We still block if tracing is disabled, but we have never
5141                  * read anything. This allows a user to cat this file, and
5142                  * then enable tracing. But after we have read something,
5143                  * we give an EOF when tracing is again disabled.
5144                  *
5145                  * iter->pos will be 0 if we haven't read anything.
5146                  */
5147                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5148                         break;
5149
5150                 mutex_unlock(&iter->mutex);
5151
5152                 ret = wait_on_pipe(iter, false);
5153
5154                 mutex_lock(&iter->mutex);
5155
5156                 if (ret)
5157                         return ret;
5158         }
5159
5160         return 1;
5161 }
5162
5163 /*
5164  * Consumer reader.
5165  */
5166 static ssize_t
5167 tracing_read_pipe(struct file *filp, char __user *ubuf,
5168                   size_t cnt, loff_t *ppos)
5169 {
5170         struct trace_iterator *iter = filp->private_data;
5171         ssize_t sret;
5172
5173         /*
5174          * Avoid more than one consumer on a single file descriptor
5175          * This is just a matter of traces coherency, the ring buffer itself
5176          * is protected.
5177          */
5178         mutex_lock(&iter->mutex);
5179
5180         /* return any leftover data */
5181         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5182         if (sret != -EBUSY)
5183                 goto out;
5184
5185         trace_seq_init(&iter->seq);
5186
5187         if (iter->trace->read) {
5188                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5189                 if (sret)
5190                         goto out;
5191         }
5192
5193 waitagain:
5194         sret = tracing_wait_pipe(filp);
5195         if (sret <= 0)
5196                 goto out;
5197
5198         /* stop when tracing is finished */
5199         if (trace_empty(iter)) {
5200                 sret = 0;
5201                 goto out;
5202         }
5203
5204         if (cnt >= PAGE_SIZE)
5205                 cnt = PAGE_SIZE - 1;
5206
5207         /* reset all but tr, trace, and overruns */
5208         memset(&iter->seq, 0,
5209                sizeof(struct trace_iterator) -
5210                offsetof(struct trace_iterator, seq));
5211         cpumask_clear(iter->started);
5212         trace_seq_init(&iter->seq);
5213         iter->pos = -1;
5214
5215         trace_event_read_lock();
5216         trace_access_lock(iter->cpu_file);
5217         while (trace_find_next_entry_inc(iter) != NULL) {
5218                 enum print_line_t ret;
5219                 int save_len = iter->seq.seq.len;
5220
5221                 ret = print_trace_line(iter);
5222                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5223                         /* don't print partial lines */
5224                         iter->seq.seq.len = save_len;
5225                         break;
5226                 }
5227                 if (ret != TRACE_TYPE_NO_CONSUME)
5228                         trace_consume(iter);
5229
5230                 if (trace_seq_used(&iter->seq) >= cnt)
5231                         break;
5232
5233                 /*
5234                  * Setting the full flag means we reached the trace_seq buffer
5235                  * size and we should leave by partial output condition above.
5236                  * One of the trace_seq_* functions is not used properly.
5237                  */
5238                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5239                           iter->ent->type);
5240         }
5241         trace_access_unlock(iter->cpu_file);
5242         trace_event_read_unlock();
5243
5244         /* Now copy what we have to the user */
5245         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5246         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5247                 trace_seq_init(&iter->seq);
5248
5249         /*
5250          * If there was nothing to send to user, in spite of consuming trace
5251          * entries, go back to wait for more entries.
5252          */
5253         if (sret == -EBUSY)
5254                 goto waitagain;
5255
5256 out:
5257         mutex_unlock(&iter->mutex);
5258
5259         return sret;
5260 }
5261
5262 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5263                                      unsigned int idx)
5264 {
5265         __free_page(spd->pages[idx]);
5266 }
5267
5268 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5269         .can_merge              = 0,
5270         .confirm                = generic_pipe_buf_confirm,
5271         .release                = generic_pipe_buf_release,
5272         .steal                  = generic_pipe_buf_steal,
5273         .get                    = generic_pipe_buf_get,
5274 };
5275
5276 static size_t
5277 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5278 {
5279         size_t count;
5280         int save_len;
5281         int ret;
5282
5283         /* Seq buffer is page-sized, exactly what we need. */
5284         for (;;) {
5285                 save_len = iter->seq.seq.len;
5286                 ret = print_trace_line(iter);
5287
5288                 if (trace_seq_has_overflowed(&iter->seq)) {
5289                         iter->seq.seq.len = save_len;
5290                         break;
5291                 }
5292
5293                 /*
5294                  * This should not be hit, because it should only
5295                  * be set if the iter->seq overflowed. But check it
5296                  * anyway to be safe.
5297                  */
5298                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5299                         iter->seq.seq.len = save_len;
5300                         break;
5301                 }
5302
5303                 count = trace_seq_used(&iter->seq) - save_len;
5304                 if (rem < count) {
5305                         rem = 0;
5306                         iter->seq.seq.len = save_len;
5307                         break;
5308                 }
5309
5310                 if (ret != TRACE_TYPE_NO_CONSUME)
5311                         trace_consume(iter);
5312                 rem -= count;
5313                 if (!trace_find_next_entry_inc(iter))   {
5314                         rem = 0;
5315                         iter->ent = NULL;
5316                         break;
5317                 }
5318         }
5319
5320         return rem;
5321 }
5322
5323 static ssize_t tracing_splice_read_pipe(struct file *filp,
5324                                         loff_t *ppos,
5325                                         struct pipe_inode_info *pipe,
5326                                         size_t len,
5327                                         unsigned int flags)
5328 {
5329         struct page *pages_def[PIPE_DEF_BUFFERS];
5330         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5331         struct trace_iterator *iter = filp->private_data;
5332         struct splice_pipe_desc spd = {
5333                 .pages          = pages_def,
5334                 .partial        = partial_def,
5335                 .nr_pages       = 0, /* This gets updated below. */
5336                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5337                 .flags          = flags,
5338                 .ops            = &tracing_pipe_buf_ops,
5339                 .spd_release    = tracing_spd_release_pipe,
5340         };
5341         ssize_t ret;
5342         size_t rem;
5343         unsigned int i;
5344
5345         if (splice_grow_spd(pipe, &spd))
5346                 return -ENOMEM;
5347
5348         mutex_lock(&iter->mutex);
5349
5350         if (iter->trace->splice_read) {
5351                 ret = iter->trace->splice_read(iter, filp,
5352                                                ppos, pipe, len, flags);
5353                 if (ret)
5354                         goto out_err;
5355         }
5356
5357         ret = tracing_wait_pipe(filp);
5358         if (ret <= 0)
5359                 goto out_err;
5360
5361         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5362                 ret = -EFAULT;
5363                 goto out_err;
5364         }
5365
5366         trace_event_read_lock();
5367         trace_access_lock(iter->cpu_file);
5368
5369         /* Fill as many pages as possible. */
5370         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5371                 spd.pages[i] = alloc_page(GFP_KERNEL);
5372                 if (!spd.pages[i])
5373                         break;
5374
5375                 rem = tracing_fill_pipe_page(rem, iter);
5376
5377                 /* Copy the data into the page, so we can start over. */
5378                 ret = trace_seq_to_buffer(&iter->seq,
5379                                           page_address(spd.pages[i]),
5380                                           trace_seq_used(&iter->seq));
5381                 if (ret < 0) {
5382                         __free_page(spd.pages[i]);
5383                         break;
5384                 }
5385                 spd.partial[i].offset = 0;
5386                 spd.partial[i].len = trace_seq_used(&iter->seq);
5387
5388                 trace_seq_init(&iter->seq);
5389         }
5390
5391         trace_access_unlock(iter->cpu_file);
5392         trace_event_read_unlock();
5393         mutex_unlock(&iter->mutex);
5394
5395         spd.nr_pages = i;
5396
5397         if (i)
5398                 ret = splice_to_pipe(pipe, &spd);
5399         else
5400                 ret = 0;
5401 out:
5402         splice_shrink_spd(&spd);
5403         return ret;
5404
5405 out_err:
5406         mutex_unlock(&iter->mutex);
5407         goto out;
5408 }
5409
5410 static ssize_t
5411 tracing_entries_read(struct file *filp, char __user *ubuf,
5412                      size_t cnt, loff_t *ppos)
5413 {
5414         struct inode *inode = file_inode(filp);
5415         struct trace_array *tr = inode->i_private;
5416         int cpu = tracing_get_cpu(inode);
5417         char buf[64];
5418         int r = 0;
5419         ssize_t ret;
5420
5421         mutex_lock(&trace_types_lock);
5422
5423         if (cpu == RING_BUFFER_ALL_CPUS) {
5424                 int cpu, buf_size_same;
5425                 unsigned long size;
5426
5427                 size = 0;
5428                 buf_size_same = 1;
5429                 /* check if all cpu sizes are same */
5430                 for_each_tracing_cpu(cpu) {
5431                         /* fill in the size from first enabled cpu */
5432                         if (size == 0)
5433                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5434                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5435                                 buf_size_same = 0;
5436                                 break;
5437                         }
5438                 }
5439
5440                 if (buf_size_same) {
5441                         if (!ring_buffer_expanded)
5442                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5443                                             size >> 10,
5444                                             trace_buf_size >> 10);
5445                         else
5446                                 r = sprintf(buf, "%lu\n", size >> 10);
5447                 } else
5448                         r = sprintf(buf, "X\n");
5449         } else
5450                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5451
5452         mutex_unlock(&trace_types_lock);
5453
5454         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5455         return ret;
5456 }
5457
5458 static ssize_t
5459 tracing_entries_write(struct file *filp, const char __user *ubuf,
5460                       size_t cnt, loff_t *ppos)
5461 {
5462         struct inode *inode = file_inode(filp);
5463         struct trace_array *tr = inode->i_private;
5464         unsigned long val;
5465         int ret;
5466
5467         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5468         if (ret)
5469                 return ret;
5470
5471         /* must have at least 1 entry */
5472         if (!val)
5473                 return -EINVAL;
5474
5475         /* value is in KB */
5476         val <<= 10;
5477         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5478         if (ret < 0)
5479                 return ret;
5480
5481         *ppos += cnt;
5482
5483         return cnt;
5484 }
5485
5486 static ssize_t
5487 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5488                                 size_t cnt, loff_t *ppos)
5489 {
5490         struct trace_array *tr = filp->private_data;
5491         char buf[64];
5492         int r, cpu;
5493         unsigned long size = 0, expanded_size = 0;
5494
5495         mutex_lock(&trace_types_lock);
5496         for_each_tracing_cpu(cpu) {
5497                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5498                 if (!ring_buffer_expanded)
5499                         expanded_size += trace_buf_size >> 10;
5500         }
5501         if (ring_buffer_expanded)
5502                 r = sprintf(buf, "%lu\n", size);
5503         else
5504                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5505         mutex_unlock(&trace_types_lock);
5506
5507         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5508 }
5509
5510 static ssize_t
5511 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5512                           size_t cnt, loff_t *ppos)
5513 {
5514         /*
5515          * There is no need to read what the user has written, this function
5516          * is just to make sure that there is no error when "echo" is used
5517          */
5518
5519         *ppos += cnt;
5520
5521         return cnt;
5522 }
5523
5524 static int
5525 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5526 {
5527         struct trace_array *tr = inode->i_private;
5528
5529         /* disable tracing ? */
5530         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5531                 tracer_tracing_off(tr);
5532         /* resize the ring buffer to 0 */
5533         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5534
5535         trace_array_put(tr);
5536
5537         return 0;
5538 }
5539
5540 static ssize_t
5541 tracing_mark_write(struct file *filp, const char __user *ubuf,
5542                                         size_t cnt, loff_t *fpos)
5543 {
5544         unsigned long addr = (unsigned long)ubuf;
5545         struct trace_array *tr = filp->private_data;
5546         struct ring_buffer_event *event;
5547         struct ring_buffer *buffer;
5548         struct print_entry *entry;
5549         unsigned long irq_flags;
5550         struct page *pages[2];
5551         void *map_page[2];
5552         int nr_pages = 1;
5553         ssize_t written;
5554         int offset;
5555         int size;
5556         int len;
5557         int ret;
5558         int i;
5559
5560         if (tracing_disabled)
5561                 return -EINVAL;
5562
5563         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5564                 return -EINVAL;
5565
5566         if (cnt > TRACE_BUF_SIZE)
5567                 cnt = TRACE_BUF_SIZE;
5568
5569         /*
5570          * Userspace is injecting traces into the kernel trace buffer.
5571          * We want to be as non intrusive as possible.
5572          * To do so, we do not want to allocate any special buffers
5573          * or take any locks, but instead write the userspace data
5574          * straight into the ring buffer.
5575          *
5576          * First we need to pin the userspace buffer into memory,
5577          * which, most likely it is, because it just referenced it.
5578          * But there's no guarantee that it is. By using get_user_pages_fast()
5579          * and kmap_atomic/kunmap_atomic() we can get access to the
5580          * pages directly. We then write the data directly into the
5581          * ring buffer.
5582          */
5583         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5584
5585         /* check if we cross pages */
5586         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5587                 nr_pages = 2;
5588
5589         offset = addr & (PAGE_SIZE - 1);
5590         addr &= PAGE_MASK;
5591
5592         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5593         if (ret < nr_pages) {
5594                 while (--ret >= 0)
5595                         put_page(pages[ret]);
5596                 written = -EFAULT;
5597                 goto out;
5598         }
5599
5600         for (i = 0; i < nr_pages; i++)
5601                 map_page[i] = kmap_atomic(pages[i]);
5602
5603         local_save_flags(irq_flags);
5604         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5605         buffer = tr->trace_buffer.buffer;
5606         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5607                                           irq_flags, preempt_count());
5608         if (!event) {
5609                 /* Ring buffer disabled, return as if not open for write */
5610                 written = -EBADF;
5611                 goto out_unlock;
5612         }
5613
5614         entry = ring_buffer_event_data(event);
5615         entry->ip = _THIS_IP_;
5616
5617         if (nr_pages == 2) {
5618                 len = PAGE_SIZE - offset;
5619                 memcpy(&entry->buf, map_page[0] + offset, len);
5620                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5621         } else
5622                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5623
5624         if (entry->buf[cnt - 1] != '\n') {
5625                 entry->buf[cnt] = '\n';
5626                 entry->buf[cnt + 1] = '\0';
5627         } else
5628                 entry->buf[cnt] = '\0';
5629
5630         __buffer_unlock_commit(buffer, event);
5631
5632         written = cnt;
5633
5634         *fpos += written;
5635
5636  out_unlock:
5637         for (i = nr_pages - 1; i >= 0; i--) {
5638                 kunmap_atomic(map_page[i]);
5639                 put_page(pages[i]);
5640         }
5641  out:
5642         return written;
5643 }
5644
5645 static int tracing_clock_show(struct seq_file *m, void *v)
5646 {
5647         struct trace_array *tr = m->private;
5648         int i;
5649
5650         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5651                 seq_printf(m,
5652                         "%s%s%s%s", i ? " " : "",
5653                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5654                         i == tr->clock_id ? "]" : "");
5655         seq_putc(m, '\n');
5656
5657         return 0;
5658 }
5659
5660 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5661 {
5662         int i;
5663
5664         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5665                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5666                         break;
5667         }
5668         if (i == ARRAY_SIZE(trace_clocks))
5669                 return -EINVAL;
5670
5671         mutex_lock(&trace_types_lock);
5672
5673         tr->clock_id = i;
5674
5675         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5676
5677         /*
5678          * New clock may not be consistent with the previous clock.
5679          * Reset the buffer so that it doesn't have incomparable timestamps.
5680          */
5681         tracing_reset_online_cpus(&tr->trace_buffer);
5682
5683 #ifdef CONFIG_TRACER_MAX_TRACE
5684         if (tr->max_buffer.buffer)
5685                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5686         tracing_reset_online_cpus(&tr->max_buffer);
5687 #endif
5688
5689         mutex_unlock(&trace_types_lock);
5690
5691         return 0;
5692 }
5693
5694 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5695                                    size_t cnt, loff_t *fpos)
5696 {
5697         struct seq_file *m = filp->private_data;
5698         struct trace_array *tr = m->private;
5699         char buf[64];
5700         const char *clockstr;
5701         int ret;
5702
5703         if (cnt >= sizeof(buf))
5704                 return -EINVAL;
5705
5706         if (copy_from_user(buf, ubuf, cnt))
5707                 return -EFAULT;
5708
5709         buf[cnt] = 0;
5710
5711         clockstr = strstrip(buf);
5712
5713         ret = tracing_set_clock(tr, clockstr);
5714         if (ret)
5715                 return ret;
5716
5717         *fpos += cnt;
5718
5719         return cnt;
5720 }
5721
5722 static int tracing_clock_open(struct inode *inode, struct file *file)
5723 {
5724         struct trace_array *tr = inode->i_private;
5725         int ret;
5726
5727         if (tracing_disabled)
5728                 return -ENODEV;
5729
5730         if (trace_array_get(tr))
5731                 return -ENODEV;
5732
5733         ret = single_open(file, tracing_clock_show, inode->i_private);
5734         if (ret < 0)
5735                 trace_array_put(tr);
5736
5737         return ret;
5738 }
5739
5740 struct ftrace_buffer_info {
5741         struct trace_iterator   iter;
5742         void                    *spare;
5743         unsigned int            read;
5744 };
5745
5746 #ifdef CONFIG_TRACER_SNAPSHOT
5747 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5748 {
5749         struct trace_array *tr = inode->i_private;
5750         struct trace_iterator *iter;
5751         struct seq_file *m;
5752         int ret = 0;
5753
5754         if (trace_array_get(tr) < 0)
5755                 return -ENODEV;
5756
5757         if (file->f_mode & FMODE_READ) {
5758                 iter = __tracing_open(inode, file, true);
5759                 if (IS_ERR(iter))
5760                         ret = PTR_ERR(iter);
5761         } else {
5762                 /* Writes still need the seq_file to hold the private data */
5763                 ret = -ENOMEM;
5764                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5765                 if (!m)
5766                         goto out;
5767                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5768                 if (!iter) {
5769                         kfree(m);
5770                         goto out;
5771                 }
5772                 ret = 0;
5773
5774                 iter->tr = tr;
5775                 iter->trace_buffer = &tr->max_buffer;
5776                 iter->cpu_file = tracing_get_cpu(inode);
5777                 m->private = iter;
5778                 file->private_data = m;
5779         }
5780 out:
5781         if (ret < 0)
5782                 trace_array_put(tr);
5783
5784         return ret;
5785 }
5786
5787 static ssize_t
5788 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5789                        loff_t *ppos)
5790 {
5791         struct seq_file *m = filp->private_data;
5792         struct trace_iterator *iter = m->private;
5793         struct trace_array *tr = iter->tr;
5794         unsigned long val;
5795         int ret;
5796
5797         ret = tracing_update_buffers();
5798         if (ret < 0)
5799                 return ret;
5800
5801         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5802         if (ret)
5803                 return ret;
5804
5805         mutex_lock(&trace_types_lock);
5806
5807         if (tr->current_trace->use_max_tr) {
5808                 ret = -EBUSY;
5809                 goto out;
5810         }
5811
5812         switch (val) {
5813         case 0:
5814                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5815                         ret = -EINVAL;
5816                         break;
5817                 }
5818                 if (tr->allocated_snapshot)
5819                         free_snapshot(tr);
5820                 break;
5821         case 1:
5822 /* Only allow per-cpu swap if the ring buffer supports it */
5823 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5824                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5825                         ret = -EINVAL;
5826                         break;
5827                 }
5828 #endif
5829                 if (!tr->allocated_snapshot)
5830                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5831                                 &tr->trace_buffer, iter->cpu_file);
5832                 else
5833                         ret = alloc_snapshot(tr);
5834
5835                 if (ret < 0)
5836                         break;
5837
5838                 local_irq_disable();
5839                 /* Now, we're going to swap */
5840                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5841                         update_max_tr(tr, current, smp_processor_id());
5842                 else
5843                         update_max_tr_single(tr, current, iter->cpu_file);
5844                 local_irq_enable();
5845                 break;
5846         default:
5847                 if (tr->allocated_snapshot) {
5848                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5849                                 tracing_reset_online_cpus(&tr->max_buffer);
5850                         else
5851                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5852                 }
5853                 break;
5854         }
5855
5856         if (ret >= 0) {
5857                 *ppos += cnt;
5858                 ret = cnt;
5859         }
5860 out:
5861         mutex_unlock(&trace_types_lock);
5862         return ret;
5863 }
5864
5865 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5866 {
5867         struct seq_file *m = file->private_data;
5868         int ret;
5869
5870         ret = tracing_release(inode, file);
5871
5872         if (file->f_mode & FMODE_READ)
5873                 return ret;
5874
5875         /* If write only, the seq_file is just a stub */
5876         if (m)
5877                 kfree(m->private);
5878         kfree(m);
5879
5880         return 0;
5881 }
5882
5883 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5884 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5885                                     size_t count, loff_t *ppos);
5886 static int tracing_buffers_release(struct inode *inode, struct file *file);
5887 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5888                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5889
5890 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5891 {
5892         struct ftrace_buffer_info *info;
5893         int ret;
5894
5895         ret = tracing_buffers_open(inode, filp);
5896         if (ret < 0)
5897                 return ret;
5898
5899         info = filp->private_data;
5900
5901         if (info->iter.trace->use_max_tr) {
5902                 tracing_buffers_release(inode, filp);
5903                 return -EBUSY;
5904         }
5905
5906         info->iter.snapshot = true;
5907         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5908
5909         return ret;
5910 }
5911
5912 #endif /* CONFIG_TRACER_SNAPSHOT */
5913
5914
5915 static const struct file_operations tracing_thresh_fops = {
5916         .open           = tracing_open_generic,
5917         .read           = tracing_thresh_read,
5918         .write          = tracing_thresh_write,
5919         .llseek         = generic_file_llseek,
5920 };
5921
5922 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5923 static const struct file_operations tracing_max_lat_fops = {
5924         .open           = tracing_open_generic,
5925         .read           = tracing_max_lat_read,
5926         .write          = tracing_max_lat_write,
5927         .llseek         = generic_file_llseek,
5928 };
5929 #endif
5930
5931 static const struct file_operations set_tracer_fops = {
5932         .open           = tracing_open_generic,
5933         .read           = tracing_set_trace_read,
5934         .write          = tracing_set_trace_write,
5935         .llseek         = generic_file_llseek,
5936 };
5937
5938 static const struct file_operations tracing_pipe_fops = {
5939         .open           = tracing_open_pipe,
5940         .poll           = tracing_poll_pipe,
5941         .read           = tracing_read_pipe,
5942         .splice_read    = tracing_splice_read_pipe,
5943         .release        = tracing_release_pipe,
5944         .llseek         = no_llseek,
5945 };
5946
5947 static const struct file_operations tracing_entries_fops = {
5948         .open           = tracing_open_generic_tr,
5949         .read           = tracing_entries_read,
5950         .write          = tracing_entries_write,
5951         .llseek         = generic_file_llseek,
5952         .release        = tracing_release_generic_tr,
5953 };
5954
5955 static const struct file_operations tracing_total_entries_fops = {
5956         .open           = tracing_open_generic_tr,
5957         .read           = tracing_total_entries_read,
5958         .llseek         = generic_file_llseek,
5959         .release        = tracing_release_generic_tr,
5960 };
5961
5962 static const struct file_operations tracing_free_buffer_fops = {
5963         .open           = tracing_open_generic_tr,
5964         .write          = tracing_free_buffer_write,
5965         .release        = tracing_free_buffer_release,
5966 };
5967
5968 static const struct file_operations tracing_mark_fops = {
5969         .open           = tracing_open_generic_tr,
5970         .write          = tracing_mark_write,
5971         .llseek         = generic_file_llseek,
5972         .release        = tracing_release_generic_tr,
5973 };
5974
5975 static const struct file_operations trace_clock_fops = {
5976         .open           = tracing_clock_open,
5977         .read           = seq_read,
5978         .llseek         = seq_lseek,
5979         .release        = tracing_single_release_tr,
5980         .write          = tracing_clock_write,
5981 };
5982
5983 #ifdef CONFIG_TRACER_SNAPSHOT
5984 static const struct file_operations snapshot_fops = {
5985         .open           = tracing_snapshot_open,
5986         .read           = seq_read,
5987         .write          = tracing_snapshot_write,
5988         .llseek         = tracing_lseek,
5989         .release        = tracing_snapshot_release,
5990 };
5991
5992 static const struct file_operations snapshot_raw_fops = {
5993         .open           = snapshot_raw_open,
5994         .read           = tracing_buffers_read,
5995         .release        = tracing_buffers_release,
5996         .splice_read    = tracing_buffers_splice_read,
5997         .llseek         = no_llseek,
5998 };
5999
6000 #endif /* CONFIG_TRACER_SNAPSHOT */
6001
6002 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6003 {
6004         struct trace_array *tr = inode->i_private;
6005         struct ftrace_buffer_info *info;
6006         int ret;
6007
6008         if (tracing_disabled)
6009                 return -ENODEV;
6010
6011         if (trace_array_get(tr) < 0)
6012                 return -ENODEV;
6013
6014         info = kzalloc(sizeof(*info), GFP_KERNEL);
6015         if (!info) {
6016                 trace_array_put(tr);
6017                 return -ENOMEM;
6018         }
6019
6020         mutex_lock(&trace_types_lock);
6021
6022         info->iter.tr           = tr;
6023         info->iter.cpu_file     = tracing_get_cpu(inode);
6024         info->iter.trace        = tr->current_trace;
6025         info->iter.trace_buffer = &tr->trace_buffer;
6026         info->spare             = NULL;
6027         /* Force reading ring buffer for first read */
6028         info->read              = (unsigned int)-1;
6029
6030         filp->private_data = info;
6031
6032         tr->current_trace->ref++;
6033
6034         mutex_unlock(&trace_types_lock);
6035
6036         ret = nonseekable_open(inode, filp);
6037         if (ret < 0)
6038                 trace_array_put(tr);
6039
6040         return ret;
6041 }
6042
6043 static unsigned int
6044 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6045 {
6046         struct ftrace_buffer_info *info = filp->private_data;
6047         struct trace_iterator *iter = &info->iter;
6048
6049         return trace_poll(iter, filp, poll_table);
6050 }
6051
6052 static ssize_t
6053 tracing_buffers_read(struct file *filp, char __user *ubuf,
6054                      size_t count, loff_t *ppos)
6055 {
6056         struct ftrace_buffer_info *info = filp->private_data;
6057         struct trace_iterator *iter = &info->iter;
6058         ssize_t ret;
6059         ssize_t size;
6060
6061         if (!count)
6062                 return 0;
6063
6064 #ifdef CONFIG_TRACER_MAX_TRACE
6065         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6066                 return -EBUSY;
6067 #endif
6068
6069         if (!info->spare)
6070                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6071                                                           iter->cpu_file);
6072         if (!info->spare)
6073                 return -ENOMEM;
6074
6075         /* Do we have previous read data to read? */
6076         if (info->read < PAGE_SIZE)
6077                 goto read;
6078
6079  again:
6080         trace_access_lock(iter->cpu_file);
6081         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6082                                     &info->spare,
6083                                     count,
6084                                     iter->cpu_file, 0);
6085         trace_access_unlock(iter->cpu_file);
6086
6087         if (ret < 0) {
6088                 if (trace_empty(iter)) {
6089                         if ((filp->f_flags & O_NONBLOCK))
6090                                 return -EAGAIN;
6091
6092                         ret = wait_on_pipe(iter, false);
6093                         if (ret)
6094                                 return ret;
6095
6096                         goto again;
6097                 }
6098                 return 0;
6099         }
6100
6101         info->read = 0;
6102  read:
6103         size = PAGE_SIZE - info->read;
6104         if (size > count)
6105                 size = count;
6106
6107         ret = copy_to_user(ubuf, info->spare + info->read, size);
6108         if (ret == size)
6109                 return -EFAULT;
6110
6111         size -= ret;
6112
6113         *ppos += size;
6114         info->read += size;
6115
6116         return size;
6117 }
6118
6119 static int tracing_buffers_release(struct inode *inode, struct file *file)
6120 {
6121         struct ftrace_buffer_info *info = file->private_data;
6122         struct trace_iterator *iter = &info->iter;
6123
6124         mutex_lock(&trace_types_lock);
6125
6126         iter->tr->current_trace->ref--;
6127
6128         __trace_array_put(iter->tr);
6129
6130         if (info->spare)
6131                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6132         kfree(info);
6133
6134         mutex_unlock(&trace_types_lock);
6135
6136         return 0;
6137 }
6138
6139 struct buffer_ref {
6140         struct ring_buffer      *buffer;
6141         void                    *page;
6142         int                     ref;
6143 };
6144
6145 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6146                                     struct pipe_buffer *buf)
6147 {
6148         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6149
6150         if (--ref->ref)
6151                 return;
6152
6153         ring_buffer_free_read_page(ref->buffer, ref->page);
6154         kfree(ref);
6155         buf->private = 0;
6156 }
6157
6158 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6159                                 struct pipe_buffer *buf)
6160 {
6161         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6162
6163         if (ref->ref > INT_MAX/2)
6164                 return false;
6165
6166         ref->ref++;
6167         return true;
6168 }
6169
6170 /* Pipe buffer operations for a buffer. */
6171 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6172         .can_merge              = 0,
6173         .confirm                = generic_pipe_buf_confirm,
6174         .release                = buffer_pipe_buf_release,
6175         .steal                  = generic_pipe_buf_steal,
6176         .get                    = buffer_pipe_buf_get,
6177 };
6178
6179 /*
6180  * Callback from splice_to_pipe(), if we need to release some pages
6181  * at the end of the spd in case we error'ed out in filling the pipe.
6182  */
6183 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6184 {
6185         struct buffer_ref *ref =
6186                 (struct buffer_ref *)spd->partial[i].private;
6187
6188         if (--ref->ref)
6189                 return;
6190
6191         ring_buffer_free_read_page(ref->buffer, ref->page);
6192         kfree(ref);
6193         spd->partial[i].private = 0;
6194 }
6195
6196 static ssize_t
6197 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6198                             struct pipe_inode_info *pipe, size_t len,
6199                             unsigned int flags)
6200 {
6201         struct ftrace_buffer_info *info = file->private_data;
6202         struct trace_iterator *iter = &info->iter;
6203         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6204         struct page *pages_def[PIPE_DEF_BUFFERS];
6205         struct splice_pipe_desc spd = {
6206                 .pages          = pages_def,
6207                 .partial        = partial_def,
6208                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6209                 .flags          = flags,
6210                 .ops            = &buffer_pipe_buf_ops,
6211                 .spd_release    = buffer_spd_release,
6212         };
6213         struct buffer_ref *ref;
6214         int entries, i;
6215         ssize_t ret = 0;
6216
6217 #ifdef CONFIG_TRACER_MAX_TRACE
6218         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6219                 return -EBUSY;
6220 #endif
6221
6222         if (*ppos & (PAGE_SIZE - 1))
6223                 return -EINVAL;
6224
6225         if (len & (PAGE_SIZE - 1)) {
6226                 if (len < PAGE_SIZE)
6227                         return -EINVAL;
6228                 len &= PAGE_MASK;
6229         }
6230
6231         if (splice_grow_spd(pipe, &spd))
6232                 return -ENOMEM;
6233
6234  again:
6235         trace_access_lock(iter->cpu_file);
6236         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6237
6238         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6239                 struct page *page;
6240                 int r;
6241
6242                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6243                 if (!ref) {
6244                         ret = -ENOMEM;
6245                         break;
6246                 }
6247
6248                 ref->ref = 1;
6249                 ref->buffer = iter->trace_buffer->buffer;
6250                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6251                 if (!ref->page) {
6252                         ret = -ENOMEM;
6253                         kfree(ref);
6254                         break;
6255                 }
6256
6257                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6258                                           len, iter->cpu_file, 1);
6259                 if (r < 0) {
6260                         ring_buffer_free_read_page(ref->buffer, ref->page);
6261                         kfree(ref);
6262                         break;
6263                 }
6264
6265                 page = virt_to_page(ref->page);
6266
6267                 spd.pages[i] = page;
6268                 spd.partial[i].len = PAGE_SIZE;
6269                 spd.partial[i].offset = 0;
6270                 spd.partial[i].private = (unsigned long)ref;
6271                 spd.nr_pages++;
6272                 *ppos += PAGE_SIZE;
6273
6274                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6275         }
6276
6277         trace_access_unlock(iter->cpu_file);
6278         spd.nr_pages = i;
6279
6280         /* did we read anything? */
6281         if (!spd.nr_pages) {
6282                 if (ret)
6283                         goto out;
6284
6285                 ret = -EAGAIN;
6286                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6287                         goto out;
6288
6289                 ret = wait_on_pipe(iter, true);
6290                 if (ret)
6291                         goto out;
6292
6293                 goto again;
6294         }
6295
6296         ret = splice_to_pipe(pipe, &spd);
6297 out:
6298         splice_shrink_spd(&spd);
6299
6300         return ret;
6301 }
6302
6303 static const struct file_operations tracing_buffers_fops = {
6304         .open           = tracing_buffers_open,
6305         .read           = tracing_buffers_read,
6306         .poll           = tracing_buffers_poll,
6307         .release        = tracing_buffers_release,
6308         .splice_read    = tracing_buffers_splice_read,
6309         .llseek         = no_llseek,
6310 };
6311
6312 static ssize_t
6313 tracing_stats_read(struct file *filp, char __user *ubuf,
6314                    size_t count, loff_t *ppos)
6315 {
6316         struct inode *inode = file_inode(filp);
6317         struct trace_array *tr = inode->i_private;
6318         struct trace_buffer *trace_buf = &tr->trace_buffer;
6319         int cpu = tracing_get_cpu(inode);
6320         struct trace_seq *s;
6321         unsigned long cnt;
6322         unsigned long long t;
6323         unsigned long usec_rem;
6324
6325         s = kmalloc(sizeof(*s), GFP_KERNEL);
6326         if (!s)
6327                 return -ENOMEM;
6328
6329         trace_seq_init(s);
6330
6331         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6332         trace_seq_printf(s, "entries: %ld\n", cnt);
6333
6334         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6335         trace_seq_printf(s, "overrun: %ld\n", cnt);
6336
6337         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6338         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6339
6340         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6341         trace_seq_printf(s, "bytes: %ld\n", cnt);
6342
6343         if (trace_clocks[tr->clock_id].in_ns) {
6344                 /* local or global for trace_clock */
6345                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6346                 usec_rem = do_div(t, USEC_PER_SEC);
6347                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6348                                                                 t, usec_rem);
6349
6350                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6351                 usec_rem = do_div(t, USEC_PER_SEC);
6352                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6353         } else {
6354                 /* counter or tsc mode for trace_clock */
6355                 trace_seq_printf(s, "oldest event ts: %llu\n",
6356                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6357
6358                 trace_seq_printf(s, "now ts: %llu\n",
6359                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6360         }
6361
6362         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6363         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6364
6365         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6366         trace_seq_printf(s, "read events: %ld\n", cnt);
6367
6368         count = simple_read_from_buffer(ubuf, count, ppos,
6369                                         s->buffer, trace_seq_used(s));
6370
6371         kfree(s);
6372
6373         return count;
6374 }
6375
6376 static const struct file_operations tracing_stats_fops = {
6377         .open           = tracing_open_generic_tr,
6378         .read           = tracing_stats_read,
6379         .llseek         = generic_file_llseek,
6380         .release        = tracing_release_generic_tr,
6381 };
6382
6383 #ifdef CONFIG_DYNAMIC_FTRACE
6384
6385 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6386 {
6387         return 0;
6388 }
6389
6390 static ssize_t
6391 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6392                   size_t cnt, loff_t *ppos)
6393 {
6394         static char ftrace_dyn_info_buffer[1024];
6395         static DEFINE_MUTEX(dyn_info_mutex);
6396         unsigned long *p = filp->private_data;
6397         char *buf = ftrace_dyn_info_buffer;
6398         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6399         int r;
6400
6401         mutex_lock(&dyn_info_mutex);
6402         r = sprintf(buf, "%ld ", *p);
6403
6404         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6405         buf[r++] = '\n';
6406
6407         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6408
6409         mutex_unlock(&dyn_info_mutex);
6410
6411         return r;
6412 }
6413
6414 static const struct file_operations tracing_dyn_info_fops = {
6415         .open           = tracing_open_generic,
6416         .read           = tracing_read_dyn_info,
6417         .llseek         = generic_file_llseek,
6418 };
6419 #endif /* CONFIG_DYNAMIC_FTRACE */
6420
6421 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6422 static void
6423 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6424 {
6425         tracing_snapshot();
6426 }
6427
6428 static void
6429 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6430 {
6431         unsigned long *count = (long *)data;
6432
6433         if (!*count)
6434                 return;
6435
6436         if (*count != -1)
6437                 (*count)--;
6438
6439         tracing_snapshot();
6440 }
6441
6442 static int
6443 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6444                       struct ftrace_probe_ops *ops, void *data)
6445 {
6446         long count = (long)data;
6447
6448         seq_printf(m, "%ps:", (void *)ip);
6449
6450         seq_puts(m, "snapshot");
6451
6452         if (count == -1)
6453                 seq_puts(m, ":unlimited\n");
6454         else
6455                 seq_printf(m, ":count=%ld\n", count);
6456
6457         return 0;
6458 }
6459
6460 static struct ftrace_probe_ops snapshot_probe_ops = {
6461         .func                   = ftrace_snapshot,
6462         .print                  = ftrace_snapshot_print,
6463 };
6464
6465 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6466         .func                   = ftrace_count_snapshot,
6467         .print                  = ftrace_snapshot_print,
6468 };
6469
6470 static int
6471 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6472                                char *glob, char *cmd, char *param, int enable)
6473 {
6474         struct ftrace_probe_ops *ops;
6475         void *count = (void *)-1;
6476         char *number;
6477         int ret;
6478
6479         /* hash funcs only work with set_ftrace_filter */
6480         if (!enable)
6481                 return -EINVAL;
6482
6483         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6484
6485         if (glob[0] == '!') {
6486                 unregister_ftrace_function_probe_func(glob+1, ops);
6487                 return 0;
6488         }
6489
6490         if (!param)
6491                 goto out_reg;
6492
6493         number = strsep(&param, ":");
6494
6495         if (!strlen(number))
6496                 goto out_reg;
6497
6498         /*
6499          * We use the callback data field (which is a pointer)
6500          * as our counter.
6501          */
6502         ret = kstrtoul(number, 0, (unsigned long *)&count);
6503         if (ret)
6504                 return ret;
6505
6506  out_reg:
6507         ret = alloc_snapshot(&global_trace);
6508         if (ret < 0)
6509                 goto out;
6510
6511         ret = register_ftrace_function_probe(glob, ops, count);
6512
6513  out:
6514         return ret < 0 ? ret : 0;
6515 }
6516
6517 static struct ftrace_func_command ftrace_snapshot_cmd = {
6518         .name                   = "snapshot",
6519         .func                   = ftrace_trace_snapshot_callback,
6520 };
6521
6522 static __init int register_snapshot_cmd(void)
6523 {
6524         return register_ftrace_command(&ftrace_snapshot_cmd);
6525 }
6526 #else
6527 static inline __init int register_snapshot_cmd(void) { return 0; }
6528 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6529
6530 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6531 {
6532         if (WARN_ON(!tr->dir))
6533                 return ERR_PTR(-ENODEV);
6534
6535         /* Top directory uses NULL as the parent */
6536         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6537                 return NULL;
6538
6539         /* All sub buffers have a descriptor */
6540         return tr->dir;
6541 }
6542
6543 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6544 {
6545         struct dentry *d_tracer;
6546
6547         if (tr->percpu_dir)
6548                 return tr->percpu_dir;
6549
6550         d_tracer = tracing_get_dentry(tr);
6551         if (IS_ERR(d_tracer))
6552                 return NULL;
6553
6554         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6555
6556         WARN_ONCE(!tr->percpu_dir,
6557                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6558
6559         return tr->percpu_dir;
6560 }
6561
6562 static struct dentry *
6563 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6564                       void *data, long cpu, const struct file_operations *fops)
6565 {
6566         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6567
6568         if (ret) /* See tracing_get_cpu() */
6569                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6570         return ret;
6571 }
6572
6573 static void
6574 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6575 {
6576         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6577         struct dentry *d_cpu;
6578         char cpu_dir[30]; /* 30 characters should be more than enough */
6579
6580         if (!d_percpu)
6581                 return;
6582
6583         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6584         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6585         if (!d_cpu) {
6586                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6587                 return;
6588         }
6589
6590         /* per cpu trace_pipe */
6591         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6592                                 tr, cpu, &tracing_pipe_fops);
6593
6594         /* per cpu trace */
6595         trace_create_cpu_file("trace", 0644, d_cpu,
6596                                 tr, cpu, &tracing_fops);
6597
6598         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6599                                 tr, cpu, &tracing_buffers_fops);
6600
6601         trace_create_cpu_file("stats", 0444, d_cpu,
6602                                 tr, cpu, &tracing_stats_fops);
6603
6604         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6605                                 tr, cpu, &tracing_entries_fops);
6606
6607 #ifdef CONFIG_TRACER_SNAPSHOT
6608         trace_create_cpu_file("snapshot", 0644, d_cpu,
6609                                 tr, cpu, &snapshot_fops);
6610
6611         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6612                                 tr, cpu, &snapshot_raw_fops);
6613 #endif
6614 }
6615
6616 #ifdef CONFIG_FTRACE_SELFTEST
6617 /* Let selftest have access to static functions in this file */
6618 #include "trace_selftest.c"
6619 #endif
6620
6621 static ssize_t
6622 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6623                         loff_t *ppos)
6624 {
6625         struct trace_option_dentry *topt = filp->private_data;
6626         char *buf;
6627
6628         if (topt->flags->val & topt->opt->bit)
6629                 buf = "1\n";
6630         else
6631                 buf = "0\n";
6632
6633         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6634 }
6635
6636 static ssize_t
6637 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6638                          loff_t *ppos)
6639 {
6640         struct trace_option_dentry *topt = filp->private_data;
6641         unsigned long val;
6642         int ret;
6643
6644         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6645         if (ret)
6646                 return ret;
6647
6648         if (val != 0 && val != 1)
6649                 return -EINVAL;
6650
6651         if (!!(topt->flags->val & topt->opt->bit) != val) {
6652                 mutex_lock(&trace_types_lock);
6653                 ret = __set_tracer_option(topt->tr, topt->flags,
6654                                           topt->opt, !val);
6655                 mutex_unlock(&trace_types_lock);
6656                 if (ret)
6657                         return ret;
6658         }
6659
6660         *ppos += cnt;
6661
6662         return cnt;
6663 }
6664
6665
6666 static const struct file_operations trace_options_fops = {
6667         .open = tracing_open_generic,
6668         .read = trace_options_read,
6669         .write = trace_options_write,
6670         .llseek = generic_file_llseek,
6671 };
6672
6673 /*
6674  * In order to pass in both the trace_array descriptor as well as the index
6675  * to the flag that the trace option file represents, the trace_array
6676  * has a character array of trace_flags_index[], which holds the index
6677  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6678  * The address of this character array is passed to the flag option file
6679  * read/write callbacks.
6680  *
6681  * In order to extract both the index and the trace_array descriptor,
6682  * get_tr_index() uses the following algorithm.
6683  *
6684  *   idx = *ptr;
6685  *
6686  * As the pointer itself contains the address of the index (remember
6687  * index[1] == 1).
6688  *
6689  * Then to get the trace_array descriptor, by subtracting that index
6690  * from the ptr, we get to the start of the index itself.
6691  *
6692  *   ptr - idx == &index[0]
6693  *
6694  * Then a simple container_of() from that pointer gets us to the
6695  * trace_array descriptor.
6696  */
6697 static void get_tr_index(void *data, struct trace_array **ptr,
6698                          unsigned int *pindex)
6699 {
6700         *pindex = *(unsigned char *)data;
6701
6702         *ptr = container_of(data - *pindex, struct trace_array,
6703                             trace_flags_index);
6704 }
6705
6706 static ssize_t
6707 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6708                         loff_t *ppos)
6709 {
6710         void *tr_index = filp->private_data;
6711         struct trace_array *tr;
6712         unsigned int index;
6713         char *buf;
6714
6715         get_tr_index(tr_index, &tr, &index);
6716
6717         if (tr->trace_flags & (1 << index))
6718                 buf = "1\n";
6719         else
6720                 buf = "0\n";
6721
6722         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6723 }
6724
6725 static ssize_t
6726 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6727                          loff_t *ppos)
6728 {
6729         void *tr_index = filp->private_data;
6730         struct trace_array *tr;
6731         unsigned int index;
6732         unsigned long val;
6733         int ret;
6734
6735         get_tr_index(tr_index, &tr, &index);
6736
6737         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6738         if (ret)
6739                 return ret;
6740
6741         if (val != 0 && val != 1)
6742                 return -EINVAL;
6743
6744         mutex_lock(&trace_types_lock);
6745         ret = set_tracer_flag(tr, 1 << index, val);
6746         mutex_unlock(&trace_types_lock);
6747
6748         if (ret < 0)
6749                 return ret;
6750
6751         *ppos += cnt;
6752
6753         return cnt;
6754 }
6755
6756 static const struct file_operations trace_options_core_fops = {
6757         .open = tracing_open_generic,
6758         .read = trace_options_core_read,
6759         .write = trace_options_core_write,
6760         .llseek = generic_file_llseek,
6761 };
6762
6763 struct dentry *trace_create_file(const char *name,
6764                                  umode_t mode,
6765                                  struct dentry *parent,
6766                                  void *data,
6767                                  const struct file_operations *fops)
6768 {
6769         struct dentry *ret;
6770
6771         ret = tracefs_create_file(name, mode, parent, data, fops);
6772         if (!ret)
6773                 pr_warn("Could not create tracefs '%s' entry\n", name);
6774
6775         return ret;
6776 }
6777
6778
6779 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6780 {
6781         struct dentry *d_tracer;
6782
6783         if (tr->options)
6784                 return tr->options;
6785
6786         d_tracer = tracing_get_dentry(tr);
6787         if (IS_ERR(d_tracer))
6788                 return NULL;
6789
6790         tr->options = tracefs_create_dir("options", d_tracer);
6791         if (!tr->options) {
6792                 pr_warn("Could not create tracefs directory 'options'\n");
6793                 return NULL;
6794         }
6795
6796         return tr->options;
6797 }
6798
6799 static void
6800 create_trace_option_file(struct trace_array *tr,
6801                          struct trace_option_dentry *topt,
6802                          struct tracer_flags *flags,
6803                          struct tracer_opt *opt)
6804 {
6805         struct dentry *t_options;
6806
6807         t_options = trace_options_init_dentry(tr);
6808         if (!t_options)
6809                 return;
6810
6811         topt->flags = flags;
6812         topt->opt = opt;
6813         topt->tr = tr;
6814
6815         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6816                                     &trace_options_fops);
6817
6818 }
6819
6820 static void
6821 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6822 {
6823         struct trace_option_dentry *topts;
6824         struct trace_options *tr_topts;
6825         struct tracer_flags *flags;
6826         struct tracer_opt *opts;
6827         int cnt;
6828         int i;
6829
6830         if (!tracer)
6831                 return;
6832
6833         flags = tracer->flags;
6834
6835         if (!flags || !flags->opts)
6836                 return;
6837
6838         /*
6839          * If this is an instance, only create flags for tracers
6840          * the instance may have.
6841          */
6842         if (!trace_ok_for_array(tracer, tr))
6843                 return;
6844
6845         for (i = 0; i < tr->nr_topts; i++) {
6846                 /* Make sure there's no duplicate flags. */
6847                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6848                         return;
6849         }
6850
6851         opts = flags->opts;
6852
6853         for (cnt = 0; opts[cnt].name; cnt++)
6854                 ;
6855
6856         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6857         if (!topts)
6858                 return;
6859
6860         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6861                             GFP_KERNEL);
6862         if (!tr_topts) {
6863                 kfree(topts);
6864                 return;
6865         }
6866
6867         tr->topts = tr_topts;
6868         tr->topts[tr->nr_topts].tracer = tracer;
6869         tr->topts[tr->nr_topts].topts = topts;
6870         tr->nr_topts++;
6871
6872         for (cnt = 0; opts[cnt].name; cnt++) {
6873                 create_trace_option_file(tr, &topts[cnt], flags,
6874                                          &opts[cnt]);
6875                 WARN_ONCE(topts[cnt].entry == NULL,
6876                           "Failed to create trace option: %s",
6877                           opts[cnt].name);
6878         }
6879 }
6880
6881 static struct dentry *
6882 create_trace_option_core_file(struct trace_array *tr,
6883                               const char *option, long index)
6884 {
6885         struct dentry *t_options;
6886
6887         t_options = trace_options_init_dentry(tr);
6888         if (!t_options)
6889                 return NULL;
6890
6891         return trace_create_file(option, 0644, t_options,
6892                                  (void *)&tr->trace_flags_index[index],
6893                                  &trace_options_core_fops);
6894 }
6895
6896 static void create_trace_options_dir(struct trace_array *tr)
6897 {
6898         struct dentry *t_options;
6899         bool top_level = tr == &global_trace;
6900         int i;
6901
6902         t_options = trace_options_init_dentry(tr);
6903         if (!t_options)
6904                 return;
6905
6906         for (i = 0; trace_options[i]; i++) {
6907                 if (top_level ||
6908                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6909                         create_trace_option_core_file(tr, trace_options[i], i);
6910         }
6911 }
6912
6913 static ssize_t
6914 rb_simple_read(struct file *filp, char __user *ubuf,
6915                size_t cnt, loff_t *ppos)
6916 {
6917         struct trace_array *tr = filp->private_data;
6918         char buf[64];
6919         int r;
6920
6921         r = tracer_tracing_is_on(tr);
6922         r = sprintf(buf, "%d\n", r);
6923
6924         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6925 }
6926
6927 static ssize_t
6928 rb_simple_write(struct file *filp, const char __user *ubuf,
6929                 size_t cnt, loff_t *ppos)
6930 {
6931         struct trace_array *tr = filp->private_data;
6932         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6933         unsigned long val;
6934         int ret;
6935
6936         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6937         if (ret)
6938                 return ret;
6939
6940         if (buffer) {
6941                 mutex_lock(&trace_types_lock);
6942                 if (!!val == tracer_tracing_is_on(tr)) {
6943                         val = 0; /* do nothing */
6944                 } else if (val) {
6945                         tracer_tracing_on(tr);
6946                         if (tr->current_trace->start)
6947                                 tr->current_trace->start(tr);
6948                 } else {
6949                         tracer_tracing_off(tr);
6950                         if (tr->current_trace->stop)
6951                                 tr->current_trace->stop(tr);
6952                 }
6953                 mutex_unlock(&trace_types_lock);
6954         }
6955
6956         (*ppos)++;
6957
6958         return cnt;
6959 }
6960
6961 static const struct file_operations rb_simple_fops = {
6962         .open           = tracing_open_generic_tr,
6963         .read           = rb_simple_read,
6964         .write          = rb_simple_write,
6965         .release        = tracing_release_generic_tr,
6966         .llseek         = default_llseek,
6967 };
6968
6969 struct dentry *trace_instance_dir;
6970
6971 static void
6972 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6973
6974 static int
6975 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6976 {
6977         enum ring_buffer_flags rb_flags;
6978
6979         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6980
6981         buf->tr = tr;
6982
6983         buf->buffer = ring_buffer_alloc(size, rb_flags);
6984         if (!buf->buffer)
6985                 return -ENOMEM;
6986
6987         buf->data = alloc_percpu(struct trace_array_cpu);
6988         if (!buf->data) {
6989                 ring_buffer_free(buf->buffer);
6990                 buf->buffer = NULL;
6991                 return -ENOMEM;
6992         }
6993
6994         /* Allocate the first page for all buffers */
6995         set_buffer_entries(&tr->trace_buffer,
6996                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6997
6998         return 0;
6999 }
7000
7001 static int allocate_trace_buffers(struct trace_array *tr, int size)
7002 {
7003         int ret;
7004
7005         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7006         if (ret)
7007                 return ret;
7008
7009 #ifdef CONFIG_TRACER_MAX_TRACE
7010         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7011                                     allocate_snapshot ? size : 1);
7012         if (WARN_ON(ret)) {
7013                 ring_buffer_free(tr->trace_buffer.buffer);
7014                 tr->trace_buffer.buffer = NULL;
7015                 free_percpu(tr->trace_buffer.data);
7016                 tr->trace_buffer.data = NULL;
7017                 return -ENOMEM;
7018         }
7019         tr->allocated_snapshot = allocate_snapshot;
7020
7021         /*
7022          * Only the top level trace array gets its snapshot allocated
7023          * from the kernel command line.
7024          */
7025         allocate_snapshot = false;
7026 #endif
7027
7028         /*
7029          * Because of some magic with the way alloc_percpu() works on
7030          * x86_64, we need to synchronize the pgd of all the tables,
7031          * otherwise the trace events that happen in x86_64 page fault
7032          * handlers can't cope with accessing the chance that a
7033          * alloc_percpu()'d memory might be touched in the page fault trace
7034          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7035          * calls in tracing, because something might get triggered within a
7036          * page fault trace event!
7037          */
7038         vmalloc_sync_mappings();
7039
7040         return 0;
7041 }
7042
7043 static void free_trace_buffer(struct trace_buffer *buf)
7044 {
7045         if (buf->buffer) {
7046                 ring_buffer_free(buf->buffer);
7047                 buf->buffer = NULL;
7048                 free_percpu(buf->data);
7049                 buf->data = NULL;
7050         }
7051 }
7052
7053 static void free_trace_buffers(struct trace_array *tr)
7054 {
7055         if (!tr)
7056                 return;
7057
7058         free_trace_buffer(&tr->trace_buffer);
7059
7060 #ifdef CONFIG_TRACER_MAX_TRACE
7061         free_trace_buffer(&tr->max_buffer);
7062 #endif
7063 }
7064
7065 static void init_trace_flags_index(struct trace_array *tr)
7066 {
7067         int i;
7068
7069         /* Used by the trace options files */
7070         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7071                 tr->trace_flags_index[i] = i;
7072 }
7073
7074 static void __update_tracer_options(struct trace_array *tr)
7075 {
7076         struct tracer *t;
7077
7078         for (t = trace_types; t; t = t->next)
7079                 add_tracer_options(tr, t);
7080 }
7081
7082 static void update_tracer_options(struct trace_array *tr)
7083 {
7084         mutex_lock(&trace_types_lock);
7085         __update_tracer_options(tr);
7086         mutex_unlock(&trace_types_lock);
7087 }
7088
7089 static int instance_mkdir(const char *name)
7090 {
7091         struct trace_array *tr;
7092         int ret;
7093
7094         mutex_lock(&trace_types_lock);
7095
7096         ret = -EEXIST;
7097         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7098                 if (tr->name && strcmp(tr->name, name) == 0)
7099                         goto out_unlock;
7100         }
7101
7102         ret = -ENOMEM;
7103         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7104         if (!tr)
7105                 goto out_unlock;
7106
7107         tr->name = kstrdup(name, GFP_KERNEL);
7108         if (!tr->name)
7109                 goto out_free_tr;
7110
7111         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7112                 goto out_free_tr;
7113
7114         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7115
7116         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7117
7118         raw_spin_lock_init(&tr->start_lock);
7119
7120         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7121
7122         tr->current_trace = &nop_trace;
7123
7124         INIT_LIST_HEAD(&tr->systems);
7125         INIT_LIST_HEAD(&tr->events);
7126
7127         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7128                 goto out_free_tr;
7129
7130         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7131         if (!tr->dir)
7132                 goto out_free_tr;
7133
7134         ret = event_trace_add_tracer(tr->dir, tr);
7135         if (ret) {
7136                 tracefs_remove_recursive(tr->dir);
7137                 goto out_free_tr;
7138         }
7139
7140         init_tracer_tracefs(tr, tr->dir);
7141         init_trace_flags_index(tr);
7142         __update_tracer_options(tr);
7143
7144         list_add(&tr->list, &ftrace_trace_arrays);
7145
7146         mutex_unlock(&trace_types_lock);
7147
7148         return 0;
7149
7150  out_free_tr:
7151         free_trace_buffers(tr);
7152         free_cpumask_var(tr->tracing_cpumask);
7153         kfree(tr->name);
7154         kfree(tr);
7155
7156  out_unlock:
7157         mutex_unlock(&trace_types_lock);
7158
7159         return ret;
7160
7161 }
7162
7163 static int instance_rmdir(const char *name)
7164 {
7165         struct trace_array *tr;
7166         int found = 0;
7167         int ret;
7168         int i;
7169
7170         mutex_lock(&trace_types_lock);
7171
7172         ret = -ENODEV;
7173         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7174                 if (tr->name && strcmp(tr->name, name) == 0) {
7175                         found = 1;
7176                         break;
7177                 }
7178         }
7179         if (!found)
7180                 goto out_unlock;
7181
7182         ret = -EBUSY;
7183         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7184                 goto out_unlock;
7185
7186         list_del(&tr->list);
7187
7188         /* Disable all the flags that were enabled coming in */
7189         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7190                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7191                         set_tracer_flag(tr, 1 << i, 0);
7192         }
7193
7194         tracing_set_nop(tr);
7195         event_trace_del_tracer(tr);
7196         ftrace_clear_pids(tr);
7197         ftrace_destroy_function_files(tr);
7198         tracefs_remove_recursive(tr->dir);
7199         free_trace_buffers(tr);
7200
7201         for (i = 0; i < tr->nr_topts; i++) {
7202                 kfree(tr->topts[i].topts);
7203         }
7204         kfree(tr->topts);
7205
7206         free_cpumask_var(tr->tracing_cpumask);
7207         kfree(tr->name);
7208         kfree(tr);
7209
7210         ret = 0;
7211
7212  out_unlock:
7213         mutex_unlock(&trace_types_lock);
7214
7215         return ret;
7216 }
7217
7218 static __init void create_trace_instances(struct dentry *d_tracer)
7219 {
7220         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7221                                                          instance_mkdir,
7222                                                          instance_rmdir);
7223         if (WARN_ON(!trace_instance_dir))
7224                 return;
7225 }
7226
7227 static void
7228 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7229 {
7230         int cpu;
7231
7232         trace_create_file("available_tracers", 0444, d_tracer,
7233                         tr, &show_traces_fops);
7234
7235         trace_create_file("current_tracer", 0644, d_tracer,
7236                         tr, &set_tracer_fops);
7237
7238         trace_create_file("tracing_cpumask", 0644, d_tracer,
7239                           tr, &tracing_cpumask_fops);
7240
7241         trace_create_file("trace_options", 0644, d_tracer,
7242                           tr, &tracing_iter_fops);
7243
7244         trace_create_file("trace", 0644, d_tracer,
7245                           tr, &tracing_fops);
7246
7247         trace_create_file("trace_pipe", 0444, d_tracer,
7248                           tr, &tracing_pipe_fops);
7249
7250         trace_create_file("buffer_size_kb", 0644, d_tracer,
7251                           tr, &tracing_entries_fops);
7252
7253         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7254                           tr, &tracing_total_entries_fops);
7255
7256         trace_create_file("free_buffer", 0200, d_tracer,
7257                           tr, &tracing_free_buffer_fops);
7258
7259         trace_create_file("trace_marker", 0220, d_tracer,
7260                           tr, &tracing_mark_fops);
7261
7262         trace_create_file("trace_clock", 0644, d_tracer, tr,
7263                           &trace_clock_fops);
7264
7265         trace_create_file("tracing_on", 0644, d_tracer,
7266                           tr, &rb_simple_fops);
7267
7268         create_trace_options_dir(tr);
7269
7270 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7271         trace_create_file("tracing_max_latency", 0644, d_tracer,
7272                         &tr->max_latency, &tracing_max_lat_fops);
7273 #endif
7274
7275         if (ftrace_create_function_files(tr, d_tracer))
7276                 WARN(1, "Could not allocate function filter files");
7277
7278 #ifdef CONFIG_TRACER_SNAPSHOT
7279         trace_create_file("snapshot", 0644, d_tracer,
7280                           tr, &snapshot_fops);
7281 #endif
7282
7283         for_each_tracing_cpu(cpu)
7284                 tracing_init_tracefs_percpu(tr, cpu);
7285
7286         ftrace_init_tracefs(tr, d_tracer);
7287 }
7288
7289 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7290 {
7291         struct vfsmount *mnt;
7292         struct file_system_type *type;
7293
7294         /*
7295          * To maintain backward compatibility for tools that mount
7296          * debugfs to get to the tracing facility, tracefs is automatically
7297          * mounted to the debugfs/tracing directory.
7298          */
7299         type = get_fs_type("tracefs");
7300         if (!type)
7301                 return NULL;
7302         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7303         put_filesystem(type);
7304         if (IS_ERR(mnt))
7305                 return NULL;
7306         mntget(mnt);
7307
7308         return mnt;
7309 }
7310
7311 /**
7312  * tracing_init_dentry - initialize top level trace array
7313  *
7314  * This is called when creating files or directories in the tracing
7315  * directory. It is called via fs_initcall() by any of the boot up code
7316  * and expects to return the dentry of the top level tracing directory.
7317  */
7318 struct dentry *tracing_init_dentry(void)
7319 {
7320         struct trace_array *tr = &global_trace;
7321
7322         /* The top level trace array uses  NULL as parent */
7323         if (tr->dir)
7324                 return NULL;
7325
7326         if (WARN_ON(!tracefs_initialized()) ||
7327                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7328                  WARN_ON(!debugfs_initialized())))
7329                 return ERR_PTR(-ENODEV);
7330
7331         /*
7332          * As there may still be users that expect the tracing
7333          * files to exist in debugfs/tracing, we must automount
7334          * the tracefs file system there, so older tools still
7335          * work with the newer kerenl.
7336          */
7337         tr->dir = debugfs_create_automount("tracing", NULL,
7338                                            trace_automount, NULL);
7339         if (!tr->dir) {
7340                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7341                 return ERR_PTR(-ENOMEM);
7342         }
7343
7344         return NULL;
7345 }
7346
7347 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7348 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7349
7350 static void __init trace_enum_init(void)
7351 {
7352         int len;
7353
7354         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7355         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7356 }
7357
7358 #ifdef CONFIG_MODULES
7359 static void trace_module_add_enums(struct module *mod)
7360 {
7361         if (!mod->num_trace_enums)
7362                 return;
7363
7364         /*
7365          * Modules with bad taint do not have events created, do
7366          * not bother with enums either.
7367          */
7368         if (trace_module_has_bad_taint(mod))
7369                 return;
7370
7371         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7372 }
7373
7374 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7375 static void trace_module_remove_enums(struct module *mod)
7376 {
7377         union trace_enum_map_item *map;
7378         union trace_enum_map_item **last = &trace_enum_maps;
7379
7380         if (!mod->num_trace_enums)
7381                 return;
7382
7383         mutex_lock(&trace_enum_mutex);
7384
7385         map = trace_enum_maps;
7386
7387         while (map) {
7388                 if (map->head.mod == mod)
7389                         break;
7390                 map = trace_enum_jmp_to_tail(map);
7391                 last = &map->tail.next;
7392                 map = map->tail.next;
7393         }
7394         if (!map)
7395                 goto out;
7396
7397         *last = trace_enum_jmp_to_tail(map)->tail.next;
7398         kfree(map);
7399  out:
7400         mutex_unlock(&trace_enum_mutex);
7401 }
7402 #else
7403 static inline void trace_module_remove_enums(struct module *mod) { }
7404 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7405
7406 static int trace_module_notify(struct notifier_block *self,
7407                                unsigned long val, void *data)
7408 {
7409         struct module *mod = data;
7410
7411         switch (val) {
7412         case MODULE_STATE_COMING:
7413                 trace_module_add_enums(mod);
7414                 break;
7415         case MODULE_STATE_GOING:
7416                 trace_module_remove_enums(mod);
7417                 break;
7418         }
7419
7420         return 0;
7421 }
7422
7423 static struct notifier_block trace_module_nb = {
7424         .notifier_call = trace_module_notify,
7425         .priority = 0,
7426 };
7427 #endif /* CONFIG_MODULES */
7428
7429 static __init int tracer_init_tracefs(void)
7430 {
7431         struct dentry *d_tracer;
7432
7433         trace_access_lock_init();
7434
7435         d_tracer = tracing_init_dentry();
7436         if (IS_ERR(d_tracer))
7437                 return 0;
7438
7439         init_tracer_tracefs(&global_trace, d_tracer);
7440         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7441
7442         trace_create_file("tracing_thresh", 0644, d_tracer,
7443                         &global_trace, &tracing_thresh_fops);
7444
7445         trace_create_file("README", 0444, d_tracer,
7446                         NULL, &tracing_readme_fops);
7447
7448         trace_create_file("saved_cmdlines", 0444, d_tracer,
7449                         NULL, &tracing_saved_cmdlines_fops);
7450
7451         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7452                           NULL, &tracing_saved_cmdlines_size_fops);
7453
7454         trace_enum_init();
7455
7456         trace_create_enum_file(d_tracer);
7457
7458 #ifdef CONFIG_MODULES
7459         register_module_notifier(&trace_module_nb);
7460 #endif
7461
7462 #ifdef CONFIG_DYNAMIC_FTRACE
7463         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7464                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7465 #endif
7466
7467         create_trace_instances(d_tracer);
7468
7469         update_tracer_options(&global_trace);
7470
7471         return 0;
7472 }
7473
7474 static int trace_panic_handler(struct notifier_block *this,
7475                                unsigned long event, void *unused)
7476 {
7477         if (ftrace_dump_on_oops)
7478                 ftrace_dump(ftrace_dump_on_oops);
7479         return NOTIFY_OK;
7480 }
7481
7482 static struct notifier_block trace_panic_notifier = {
7483         .notifier_call  = trace_panic_handler,
7484         .next           = NULL,
7485         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7486 };
7487
7488 static int trace_die_handler(struct notifier_block *self,
7489                              unsigned long val,
7490                              void *data)
7491 {
7492         switch (val) {
7493         case DIE_OOPS:
7494                 if (ftrace_dump_on_oops)
7495                         ftrace_dump(ftrace_dump_on_oops);
7496                 break;
7497         default:
7498                 break;
7499         }
7500         return NOTIFY_OK;
7501 }
7502
7503 static struct notifier_block trace_die_notifier = {
7504         .notifier_call = trace_die_handler,
7505         .priority = 200
7506 };
7507
7508 /*
7509  * printk is set to max of 1024, we really don't need it that big.
7510  * Nothing should be printing 1000 characters anyway.
7511  */
7512 #define TRACE_MAX_PRINT         1000
7513
7514 /*
7515  * Define here KERN_TRACE so that we have one place to modify
7516  * it if we decide to change what log level the ftrace dump
7517  * should be at.
7518  */
7519 #define KERN_TRACE              KERN_EMERG
7520
7521 void
7522 trace_printk_seq(struct trace_seq *s)
7523 {
7524         /* Probably should print a warning here. */
7525         if (s->seq.len >= TRACE_MAX_PRINT)
7526                 s->seq.len = TRACE_MAX_PRINT;
7527
7528         /*
7529          * More paranoid code. Although the buffer size is set to
7530          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7531          * an extra layer of protection.
7532          */
7533         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7534                 s->seq.len = s->seq.size - 1;
7535
7536         /* should be zero ended, but we are paranoid. */
7537         s->buffer[s->seq.len] = 0;
7538
7539         printk(KERN_TRACE "%s", s->buffer);
7540
7541         trace_seq_init(s);
7542 }
7543
7544 void trace_init_global_iter(struct trace_iterator *iter)
7545 {
7546         iter->tr = &global_trace;
7547         iter->trace = iter->tr->current_trace;
7548         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7549         iter->trace_buffer = &global_trace.trace_buffer;
7550
7551         if (iter->trace && iter->trace->open)
7552                 iter->trace->open(iter);
7553
7554         /* Annotate start of buffers if we had overruns */
7555         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7556                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7557
7558         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7559         if (trace_clocks[iter->tr->clock_id].in_ns)
7560                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7561 }
7562
7563 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7564 {
7565         /* use static because iter can be a bit big for the stack */
7566         static struct trace_iterator iter;
7567         static atomic_t dump_running;
7568         struct trace_array *tr = &global_trace;
7569         unsigned int old_userobj;
7570         unsigned long flags;
7571         int cnt = 0, cpu;
7572
7573         /* Only allow one dump user at a time. */
7574         if (atomic_inc_return(&dump_running) != 1) {
7575                 atomic_dec(&dump_running);
7576                 return;
7577         }
7578
7579         /*
7580          * Always turn off tracing when we dump.
7581          * We don't need to show trace output of what happens
7582          * between multiple crashes.
7583          *
7584          * If the user does a sysrq-z, then they can re-enable
7585          * tracing with echo 1 > tracing_on.
7586          */
7587         tracing_off();
7588
7589         local_irq_save(flags);
7590
7591         /* Simulate the iterator */
7592         trace_init_global_iter(&iter);
7593
7594         for_each_tracing_cpu(cpu) {
7595                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7596         }
7597
7598         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7599
7600         /* don't look at user memory in panic mode */
7601         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7602
7603         switch (oops_dump_mode) {
7604         case DUMP_ALL:
7605                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7606                 break;
7607         case DUMP_ORIG:
7608                 iter.cpu_file = raw_smp_processor_id();
7609                 break;
7610         case DUMP_NONE:
7611                 goto out_enable;
7612         default:
7613                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7614                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7615         }
7616
7617         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7618
7619         /* Did function tracer already get disabled? */
7620         if (ftrace_is_dead()) {
7621                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7622                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7623         }
7624
7625         /*
7626          * We need to stop all tracing on all CPUS to read the
7627          * the next buffer. This is a bit expensive, but is
7628          * not done often. We fill all what we can read,
7629          * and then release the locks again.
7630          */
7631
7632         while (!trace_empty(&iter)) {
7633
7634                 if (!cnt)
7635                         printk(KERN_TRACE "---------------------------------\n");
7636
7637                 cnt++;
7638
7639                 trace_iterator_reset(&iter);
7640                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7641
7642                 if (trace_find_next_entry_inc(&iter) != NULL) {
7643                         int ret;
7644
7645                         ret = print_trace_line(&iter);
7646                         if (ret != TRACE_TYPE_NO_CONSUME)
7647                                 trace_consume(&iter);
7648                 }
7649                 touch_nmi_watchdog();
7650
7651                 trace_printk_seq(&iter.seq);
7652         }
7653
7654         if (!cnt)
7655                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7656         else
7657                 printk(KERN_TRACE "---------------------------------\n");
7658
7659  out_enable:
7660         tr->trace_flags |= old_userobj;
7661
7662         for_each_tracing_cpu(cpu) {
7663                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7664         }
7665         atomic_dec(&dump_running);
7666         local_irq_restore(flags);
7667 }
7668 EXPORT_SYMBOL_GPL(ftrace_dump);
7669
7670 __init static int tracer_alloc_buffers(void)
7671 {
7672         int ring_buf_size;
7673         int ret = -ENOMEM;
7674
7675         /*
7676          * Make sure we don't accidently add more trace options
7677          * than we have bits for.
7678          */
7679         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7680
7681         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7682                 goto out;
7683
7684         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7685                 goto out_free_buffer_mask;
7686
7687         /* Only allocate trace_printk buffers if a trace_printk exists */
7688         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7689                 /* Must be called before global_trace.buffer is allocated */
7690                 trace_printk_init_buffers();
7691
7692         /* To save memory, keep the ring buffer size to its minimum */
7693         if (ring_buffer_expanded)
7694                 ring_buf_size = trace_buf_size;
7695         else
7696                 ring_buf_size = 1;
7697
7698         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7699         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7700
7701         raw_spin_lock_init(&global_trace.start_lock);
7702
7703         /* Used for event triggers */
7704         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7705         if (!temp_buffer)
7706                 goto out_free_cpumask;
7707
7708         if (trace_create_savedcmd() < 0)
7709                 goto out_free_temp_buffer;
7710
7711         /* TODO: make the number of buffers hot pluggable with CPUS */
7712         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7713                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7714                 WARN_ON(1);
7715                 goto out_free_savedcmd;
7716         }
7717
7718         if (global_trace.buffer_disabled)
7719                 tracing_off();
7720
7721         if (trace_boot_clock) {
7722                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7723                 if (ret < 0)
7724                         pr_warn("Trace clock %s not defined, going back to default\n",
7725                                 trace_boot_clock);
7726         }
7727
7728         /*
7729          * register_tracer() might reference current_trace, so it
7730          * needs to be set before we register anything. This is
7731          * just a bootstrap of current_trace anyway.
7732          */
7733         global_trace.current_trace = &nop_trace;
7734
7735         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7736
7737         ftrace_init_global_array_ops(&global_trace);
7738
7739         init_trace_flags_index(&global_trace);
7740
7741         register_tracer(&nop_trace);
7742
7743         /* All seems OK, enable tracing */
7744         tracing_disabled = 0;
7745
7746         atomic_notifier_chain_register(&panic_notifier_list,
7747                                        &trace_panic_notifier);
7748
7749         register_die_notifier(&trace_die_notifier);
7750
7751         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7752
7753         INIT_LIST_HEAD(&global_trace.systems);
7754         INIT_LIST_HEAD(&global_trace.events);
7755         list_add(&global_trace.list, &ftrace_trace_arrays);
7756
7757         apply_trace_boot_options();
7758
7759         register_snapshot_cmd();
7760
7761         return 0;
7762
7763 out_free_savedcmd:
7764         free_saved_cmdlines_buffer(savedcmd);
7765 out_free_temp_buffer:
7766         ring_buffer_free(temp_buffer);
7767 out_free_cpumask:
7768         free_cpumask_var(global_trace.tracing_cpumask);
7769 out_free_buffer_mask:
7770         free_cpumask_var(tracing_buffer_mask);
7771 out:
7772         return ret;
7773 }
7774
7775 void __init trace_init(void)
7776 {
7777         if (tracepoint_printk) {
7778                 tracepoint_print_iter =
7779                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7780                 if (WARN_ON(!tracepoint_print_iter))
7781                         tracepoint_printk = 0;
7782         }
7783         tracer_alloc_buffers();
7784         trace_event_init();
7785 }
7786
7787 __init static int clear_boot_tracer(void)
7788 {
7789         /*
7790          * The default tracer at boot buffer is an init section.
7791          * This function is called in lateinit. If we did not
7792          * find the boot tracer, then clear it out, to prevent
7793          * later registration from accessing the buffer that is
7794          * about to be freed.
7795          */
7796         if (!default_bootup_tracer)
7797                 return 0;
7798
7799         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7800                default_bootup_tracer);
7801         default_bootup_tracer = NULL;
7802
7803         return 0;
7804 }
7805
7806 fs_initcall(tracer_init_tracefs);
7807 late_initcall_sync(clear_boot_tracer);