12bee7043be6f8c63eebca7fa274a7e765fc66c0
[releases.git] / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         /* Ignore the "tp_printk_stop_on_boot" param */
232         if (*str == '_')
233                 return 0;
234
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(cycle_t nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         TRACE_ITER_EVENT_FORK
263
264 /*
265  * The global_trace is the descriptor that holds the tracing
266  * buffers for the live tracing. For each CPU, it contains
267  * a link list of pages that will store trace entries. The
268  * page descriptor of the pages in the memory is used to hold
269  * the link list by linking the lru item in the page descriptor
270  * to each of the pages in the buffer per CPU.
271  *
272  * For each active CPU there is a data field that holds the
273  * pages for the buffer for that CPU. Each CPU has the same number
274  * of pages allocated for its buffer.
275  */
276 static struct trace_array global_trace = {
277         .trace_flags = TRACE_DEFAULT_FLAGS,
278 };
279
280 LIST_HEAD(ftrace_trace_arrays);
281
282 int trace_array_get(struct trace_array *this_tr)
283 {
284         struct trace_array *tr;
285         int ret = -ENODEV;
286
287         mutex_lock(&trace_types_lock);
288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
289                 if (tr == this_tr) {
290                         tr->ref++;
291                         ret = 0;
292                         break;
293                 }
294         }
295         mutex_unlock(&trace_types_lock);
296
297         return ret;
298 }
299
300 static void __trace_array_put(struct trace_array *this_tr)
301 {
302         WARN_ON(!this_tr->ref);
303         this_tr->ref--;
304 }
305
306 void trace_array_put(struct trace_array *this_tr)
307 {
308         mutex_lock(&trace_types_lock);
309         __trace_array_put(this_tr);
310         mutex_unlock(&trace_types_lock);
311 }
312
313 int call_filter_check_discard(struct trace_event_call *call, void *rec,
314                               struct ring_buffer *buffer,
315                               struct ring_buffer_event *event)
316 {
317         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
318             !filter_match_preds(call->filter, rec)) {
319                 __trace_event_discard_commit(buffer, event);
320                 return 1;
321         }
322
323         return 0;
324 }
325
326 void trace_free_pid_list(struct trace_pid_list *pid_list)
327 {
328         vfree(pid_list->pids);
329         kfree(pid_list);
330 }
331
332 /**
333  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
334  * @filtered_pids: The list of pids to check
335  * @search_pid: The PID to find in @filtered_pids
336  *
337  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
338  */
339 bool
340 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
341 {
342         /*
343          * If pid_max changed after filtered_pids was created, we
344          * by default ignore all pids greater than the previous pid_max.
345          */
346         if (search_pid >= filtered_pids->pid_max)
347                 return false;
348
349         return test_bit(search_pid, filtered_pids->pids);
350 }
351
352 /**
353  * trace_ignore_this_task - should a task be ignored for tracing
354  * @filtered_pids: The list of pids to check
355  * @task: The task that should be ignored if not filtered
356  *
357  * Checks if @task should be traced or not from @filtered_pids.
358  * Returns true if @task should *NOT* be traced.
359  * Returns false if @task should be traced.
360  */
361 bool
362 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
363 {
364         /*
365          * Return false, because if filtered_pids does not exist,
366          * all pids are good to trace.
367          */
368         if (!filtered_pids)
369                 return false;
370
371         return !trace_find_filtered_pid(filtered_pids, task->pid);
372 }
373
374 /**
375  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
376  * @pid_list: The list to modify
377  * @self: The current task for fork or NULL for exit
378  * @task: The task to add or remove
379  *
380  * If adding a task, if @self is defined, the task is only added if @self
381  * is also included in @pid_list. This happens on fork and tasks should
382  * only be added when the parent is listed. If @self is NULL, then the
383  * @task pid will be removed from the list, which would happen on exit
384  * of a task.
385  */
386 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
387                                   struct task_struct *self,
388                                   struct task_struct *task)
389 {
390         if (!pid_list)
391                 return;
392
393         /* For forks, we only add if the forking task is listed */
394         if (self) {
395                 if (!trace_find_filtered_pid(pid_list, self->pid))
396                         return;
397         }
398
399         /* Sorry, but we don't support pid_max changing after setting */
400         if (task->pid >= pid_list->pid_max)
401                 return;
402
403         /* "self" is set for forks, and NULL for exits */
404         if (self)
405                 set_bit(task->pid, pid_list->pids);
406         else
407                 clear_bit(task->pid, pid_list->pids);
408 }
409
410 /**
411  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
412  * @pid_list: The pid list to show
413  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
414  * @pos: The position of the file
415  *
416  * This is used by the seq_file "next" operation to iterate the pids
417  * listed in a trace_pid_list structure.
418  *
419  * Returns the pid+1 as we want to display pid of zero, but NULL would
420  * stop the iteration.
421  */
422 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
423 {
424         unsigned long pid = (unsigned long)v;
425
426         (*pos)++;
427
428         /* pid already is +1 of the actual prevous bit */
429         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
430
431         /* Return pid + 1 to allow zero to be represented */
432         if (pid < pid_list->pid_max)
433                 return (void *)(pid + 1);
434
435         return NULL;
436 }
437
438 /**
439  * trace_pid_start - Used for seq_file to start reading pid lists
440  * @pid_list: The pid list to show
441  * @pos: The position of the file
442  *
443  * This is used by seq_file "start" operation to start the iteration
444  * of listing pids.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
450 {
451         unsigned long pid;
452         loff_t l = 0;
453
454         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
455         if (pid >= pid_list->pid_max)
456                 return NULL;
457
458         /* Return pid + 1 so that zero can be the exit value */
459         for (pid++; pid && l < *pos;
460              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
461                 ;
462         return (void *)pid;
463 }
464
465 /**
466  * trace_pid_show - show the current pid in seq_file processing
467  * @m: The seq_file structure to write into
468  * @v: A void pointer of the pid (+1) value to display
469  *
470  * Can be directly used by seq_file operations to display the current
471  * pid value.
472  */
473 int trace_pid_show(struct seq_file *m, void *v)
474 {
475         unsigned long pid = (unsigned long)v - 1;
476
477         seq_printf(m, "%lu\n", pid);
478         return 0;
479 }
480
481 /* 128 should be much more than enough */
482 #define PID_BUF_SIZE            127
483
484 int trace_pid_write(struct trace_pid_list *filtered_pids,
485                     struct trace_pid_list **new_pid_list,
486                     const char __user *ubuf, size_t cnt)
487 {
488         struct trace_pid_list *pid_list;
489         struct trace_parser parser;
490         unsigned long val;
491         int nr_pids = 0;
492         ssize_t read = 0;
493         ssize_t ret = 0;
494         loff_t pos;
495         pid_t pid;
496
497         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
498                 return -ENOMEM;
499
500         /*
501          * Always recreate a new array. The write is an all or nothing
502          * operation. Always create a new array when adding new pids by
503          * the user. If the operation fails, then the current list is
504          * not modified.
505          */
506         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
507         if (!pid_list) {
508                 trace_parser_put(&parser);
509                 return -ENOMEM;
510         }
511
512         pid_list->pid_max = READ_ONCE(pid_max);
513
514         /* Only truncating will shrink pid_max */
515         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
516                 pid_list->pid_max = filtered_pids->pid_max;
517
518         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
519         if (!pid_list->pids) {
520                 trace_parser_put(&parser);
521                 kfree(pid_list);
522                 return -ENOMEM;
523         }
524
525         if (filtered_pids) {
526                 /* copy the current bits to the new max */
527                 for_each_set_bit(pid, filtered_pids->pids,
528                                  filtered_pids->pid_max) {
529                         set_bit(pid, pid_list->pids);
530                         nr_pids++;
531                 }
532         }
533
534         while (cnt > 0) {
535
536                 pos = 0;
537
538                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
539                 if (ret < 0 || !trace_parser_loaded(&parser))
540                         break;
541
542                 read += ret;
543                 ubuf += ret;
544                 cnt -= ret;
545
546                 parser.buffer[parser.idx] = 0;
547
548                 ret = -EINVAL;
549                 if (kstrtoul(parser.buffer, 0, &val))
550                         break;
551                 if (val >= pid_list->pid_max)
552                         break;
553
554                 pid = (pid_t)val;
555
556                 set_bit(pid, pid_list->pids);
557                 nr_pids++;
558
559                 trace_parser_clear(&parser);
560                 ret = 0;
561         }
562         trace_parser_put(&parser);
563
564         if (ret < 0) {
565                 trace_free_pid_list(pid_list);
566                 return ret;
567         }
568
569         if (!nr_pids) {
570                 /* Cleared the list of pids */
571                 trace_free_pid_list(pid_list);
572                 read = ret;
573                 pid_list = NULL;
574         }
575
576         *new_pid_list = pid_list;
577
578         return read;
579 }
580
581 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
582 {
583         u64 ts;
584
585         /* Early boot up does not have a buffer yet */
586         if (!buf->buffer)
587                 return trace_clock_local();
588
589         ts = ring_buffer_time_stamp(buf->buffer, cpu);
590         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
591
592         return ts;
593 }
594
595 cycle_t ftrace_now(int cpu)
596 {
597         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
598 }
599
600 /**
601  * tracing_is_enabled - Show if global_trace has been disabled
602  *
603  * Shows if the global trace has been enabled or not. It uses the
604  * mirror flag "buffer_disabled" to be used in fast paths such as for
605  * the irqsoff tracer. But it may be inaccurate due to races. If you
606  * need to know the accurate state, use tracing_is_on() which is a little
607  * slower, but accurate.
608  */
609 int tracing_is_enabled(void)
610 {
611         /*
612          * For quick access (irqsoff uses this in fast path), just
613          * return the mirror variable of the state of the ring buffer.
614          * It's a little racy, but we don't really care.
615          */
616         smp_rmb();
617         return !global_trace.buffer_disabled;
618 }
619
620 /*
621  * trace_buf_size is the size in bytes that is allocated
622  * for a buffer. Note, the number of bytes is always rounded
623  * to page size.
624  *
625  * This number is purposely set to a low number of 16384.
626  * If the dump on oops happens, it will be much appreciated
627  * to not have to wait for all that output. Anyway this can be
628  * boot time and run time configurable.
629  */
630 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
631
632 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
633
634 /* trace_types holds a link list of available tracers. */
635 static struct tracer            *trace_types __read_mostly;
636
637 /*
638  * trace_types_lock is used to protect the trace_types list.
639  */
640 DEFINE_MUTEX(trace_types_lock);
641
642 /*
643  * serialize the access of the ring buffer
644  *
645  * ring buffer serializes readers, but it is low level protection.
646  * The validity of the events (which returns by ring_buffer_peek() ..etc)
647  * are not protected by ring buffer.
648  *
649  * The content of events may become garbage if we allow other process consumes
650  * these events concurrently:
651  *   A) the page of the consumed events may become a normal page
652  *      (not reader page) in ring buffer, and this page will be rewrited
653  *      by events producer.
654  *   B) The page of the consumed events may become a page for splice_read,
655  *      and this page will be returned to system.
656  *
657  * These primitives allow multi process access to different cpu ring buffer
658  * concurrently.
659  *
660  * These primitives don't distinguish read-only and read-consume access.
661  * Multi read-only access are also serialized.
662  */
663
664 #ifdef CONFIG_SMP
665 static DECLARE_RWSEM(all_cpu_access_lock);
666 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
667
668 static inline void trace_access_lock(int cpu)
669 {
670         if (cpu == RING_BUFFER_ALL_CPUS) {
671                 /* gain it for accessing the whole ring buffer. */
672                 down_write(&all_cpu_access_lock);
673         } else {
674                 /* gain it for accessing a cpu ring buffer. */
675
676                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
677                 down_read(&all_cpu_access_lock);
678
679                 /* Secondly block other access to this @cpu ring buffer. */
680                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
681         }
682 }
683
684 static inline void trace_access_unlock(int cpu)
685 {
686         if (cpu == RING_BUFFER_ALL_CPUS) {
687                 up_write(&all_cpu_access_lock);
688         } else {
689                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
690                 up_read(&all_cpu_access_lock);
691         }
692 }
693
694 static inline void trace_access_lock_init(void)
695 {
696         int cpu;
697
698         for_each_possible_cpu(cpu)
699                 mutex_init(&per_cpu(cpu_access_lock, cpu));
700 }
701
702 #else
703
704 static DEFINE_MUTEX(access_lock);
705
706 static inline void trace_access_lock(int cpu)
707 {
708         (void)cpu;
709         mutex_lock(&access_lock);
710 }
711
712 static inline void trace_access_unlock(int cpu)
713 {
714         (void)cpu;
715         mutex_unlock(&access_lock);
716 }
717
718 static inline void trace_access_lock_init(void)
719 {
720 }
721
722 #endif
723
724 #ifdef CONFIG_STACKTRACE
725 static void __ftrace_trace_stack(struct ring_buffer *buffer,
726                                  unsigned long flags,
727                                  int skip, int pc, struct pt_regs *regs);
728 static inline void ftrace_trace_stack(struct trace_array *tr,
729                                       struct ring_buffer *buffer,
730                                       unsigned long flags,
731                                       int skip, int pc, struct pt_regs *regs);
732
733 #else
734 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
735                                         unsigned long flags,
736                                         int skip, int pc, struct pt_regs *regs)
737 {
738 }
739 static inline void ftrace_trace_stack(struct trace_array *tr,
740                                       struct ring_buffer *buffer,
741                                       unsigned long flags,
742                                       int skip, int pc, struct pt_regs *regs)
743 {
744 }
745
746 #endif
747
748 static void tracer_tracing_on(struct trace_array *tr)
749 {
750         if (tr->trace_buffer.buffer)
751                 ring_buffer_record_on(tr->trace_buffer.buffer);
752         /*
753          * This flag is looked at when buffers haven't been allocated
754          * yet, or by some tracers (like irqsoff), that just want to
755          * know if the ring buffer has been disabled, but it can handle
756          * races of where it gets disabled but we still do a record.
757          * As the check is in the fast path of the tracers, it is more
758          * important to be fast than accurate.
759          */
760         tr->buffer_disabled = 0;
761         /* Make the flag seen by readers */
762         smp_wmb();
763 }
764
765 /**
766  * tracing_on - enable tracing buffers
767  *
768  * This function enables tracing buffers that may have been
769  * disabled with tracing_off.
770  */
771 void tracing_on(void)
772 {
773         tracer_tracing_on(&global_trace);
774 }
775 EXPORT_SYMBOL_GPL(tracing_on);
776
777 /**
778  * __trace_puts - write a constant string into the trace buffer.
779  * @ip:    The address of the caller
780  * @str:   The constant string to write
781  * @size:  The size of the string.
782  */
783 int __trace_puts(unsigned long ip, const char *str, int size)
784 {
785         struct ring_buffer_event *event;
786         struct ring_buffer *buffer;
787         struct print_entry *entry;
788         unsigned long irq_flags;
789         int alloc;
790         int pc;
791
792         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
793                 return 0;
794
795         pc = preempt_count();
796
797         if (unlikely(tracing_selftest_running || tracing_disabled))
798                 return 0;
799
800         alloc = sizeof(*entry) + size + 2; /* possible \n added */
801
802         local_save_flags(irq_flags);
803         buffer = global_trace.trace_buffer.buffer;
804         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
805                                           irq_flags, pc);
806         if (!event)
807                 return 0;
808
809         entry = ring_buffer_event_data(event);
810         entry->ip = ip;
811
812         memcpy(&entry->buf, str, size);
813
814         /* Add a newline if necessary */
815         if (entry->buf[size - 1] != '\n') {
816                 entry->buf[size] = '\n';
817                 entry->buf[size + 1] = '\0';
818         } else
819                 entry->buf[size] = '\0';
820
821         __buffer_unlock_commit(buffer, event);
822         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
823
824         return size;
825 }
826 EXPORT_SYMBOL_GPL(__trace_puts);
827
828 /**
829  * __trace_bputs - write the pointer to a constant string into trace buffer
830  * @ip:    The address of the caller
831  * @str:   The constant string to write to the buffer to
832  */
833 int __trace_bputs(unsigned long ip, const char *str)
834 {
835         struct ring_buffer_event *event;
836         struct ring_buffer *buffer;
837         struct bputs_entry *entry;
838         unsigned long irq_flags;
839         int size = sizeof(struct bputs_entry);
840         int pc;
841
842         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843                 return 0;
844
845         pc = preempt_count();
846
847         if (unlikely(tracing_selftest_running || tracing_disabled))
848                 return 0;
849
850         local_save_flags(irq_flags);
851         buffer = global_trace.trace_buffer.buffer;
852         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
853                                           irq_flags, pc);
854         if (!event)
855                 return 0;
856
857         entry = ring_buffer_event_data(event);
858         entry->ip                       = ip;
859         entry->str                      = str;
860
861         __buffer_unlock_commit(buffer, event);
862         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
863
864         return 1;
865 }
866 EXPORT_SYMBOL_GPL(__trace_bputs);
867
868 #ifdef CONFIG_TRACER_SNAPSHOT
869 /**
870  * trace_snapshot - take a snapshot of the current buffer.
871  *
872  * This causes a swap between the snapshot buffer and the current live
873  * tracing buffer. You can use this to take snapshots of the live
874  * trace when some condition is triggered, but continue to trace.
875  *
876  * Note, make sure to allocate the snapshot with either
877  * a tracing_snapshot_alloc(), or by doing it manually
878  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
879  *
880  * If the snapshot buffer is not allocated, it will stop tracing.
881  * Basically making a permanent snapshot.
882  */
883 void tracing_snapshot(void)
884 {
885         struct trace_array *tr = &global_trace;
886         struct tracer *tracer = tr->current_trace;
887         unsigned long flags;
888
889         if (in_nmi()) {
890                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
891                 internal_trace_puts("*** snapshot is being ignored        ***\n");
892                 return;
893         }
894
895         if (!tr->allocated_snapshot) {
896                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
897                 internal_trace_puts("*** stopping trace here!   ***\n");
898                 tracing_off();
899                 return;
900         }
901
902         /* Note, snapshot can not be used when the tracer uses it */
903         if (tracer->use_max_tr) {
904                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
905                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
906                 return;
907         }
908
909         local_irq_save(flags);
910         update_max_tr(tr, current, smp_processor_id());
911         local_irq_restore(flags);
912 }
913 EXPORT_SYMBOL_GPL(tracing_snapshot);
914
915 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
916                                         struct trace_buffer *size_buf, int cpu_id);
917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
918
919 static int alloc_snapshot(struct trace_array *tr)
920 {
921         int ret;
922
923         if (!tr->allocated_snapshot) {
924
925                 /* allocate spare buffer */
926                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
927                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
928                 if (ret < 0)
929                         return ret;
930
931                 tr->allocated_snapshot = true;
932         }
933
934         return 0;
935 }
936
937 static void free_snapshot(struct trace_array *tr)
938 {
939         /*
940          * We don't free the ring buffer. instead, resize it because
941          * The max_tr ring buffer has some state (e.g. ring->clock) and
942          * we want preserve it.
943          */
944         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
945         set_buffer_entries(&tr->max_buffer, 1);
946         tracing_reset_online_cpus(&tr->max_buffer);
947         tr->allocated_snapshot = false;
948 }
949
950 /**
951  * tracing_alloc_snapshot - allocate snapshot buffer.
952  *
953  * This only allocates the snapshot buffer if it isn't already
954  * allocated - it doesn't also take a snapshot.
955  *
956  * This is meant to be used in cases where the snapshot buffer needs
957  * to be set up for events that can't sleep but need to be able to
958  * trigger a snapshot.
959  */
960 int tracing_alloc_snapshot(void)
961 {
962         struct trace_array *tr = &global_trace;
963         int ret;
964
965         ret = alloc_snapshot(tr);
966         WARN_ON(ret < 0);
967
968         return ret;
969 }
970 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
971
972 /**
973  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
974  *
975  * This is similar to trace_snapshot(), but it will allocate the
976  * snapshot buffer if it isn't already allocated. Use this only
977  * where it is safe to sleep, as the allocation may sleep.
978  *
979  * This causes a swap between the snapshot buffer and the current live
980  * tracing buffer. You can use this to take snapshots of the live
981  * trace when some condition is triggered, but continue to trace.
982  */
983 void tracing_snapshot_alloc(void)
984 {
985         int ret;
986
987         ret = tracing_alloc_snapshot();
988         if (ret < 0)
989                 return;
990
991         tracing_snapshot();
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
994 #else
995 void tracing_snapshot(void)
996 {
997         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000 int tracing_alloc_snapshot(void)
1001 {
1002         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1003         return -ENODEV;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1006 void tracing_snapshot_alloc(void)
1007 {
1008         /* Give warning */
1009         tracing_snapshot();
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1012 #endif /* CONFIG_TRACER_SNAPSHOT */
1013
1014 static void tracer_tracing_off(struct trace_array *tr)
1015 {
1016         if (tr->trace_buffer.buffer)
1017                 ring_buffer_record_off(tr->trace_buffer.buffer);
1018         /*
1019          * This flag is looked at when buffers haven't been allocated
1020          * yet, or by some tracers (like irqsoff), that just want to
1021          * know if the ring buffer has been disabled, but it can handle
1022          * races of where it gets disabled but we still do a record.
1023          * As the check is in the fast path of the tracers, it is more
1024          * important to be fast than accurate.
1025          */
1026         tr->buffer_disabled = 1;
1027         /* Make the flag seen by readers */
1028         smp_wmb();
1029 }
1030
1031 /**
1032  * tracing_off - turn off tracing buffers
1033  *
1034  * This function stops the tracing buffers from recording data.
1035  * It does not disable any overhead the tracers themselves may
1036  * be causing. This function simply causes all recording to
1037  * the ring buffers to fail.
1038  */
1039 void tracing_off(void)
1040 {
1041         tracer_tracing_off(&global_trace);
1042 }
1043 EXPORT_SYMBOL_GPL(tracing_off);
1044
1045 void disable_trace_on_warning(void)
1046 {
1047         if (__disable_trace_on_warning)
1048                 tracing_off();
1049 }
1050
1051 /**
1052  * tracer_tracing_is_on - show real state of ring buffer enabled
1053  * @tr : the trace array to know if ring buffer is enabled
1054  *
1055  * Shows real state of the ring buffer if it is enabled or not.
1056  */
1057 int tracer_tracing_is_on(struct trace_array *tr)
1058 {
1059         if (tr->trace_buffer.buffer)
1060                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1061         return !tr->buffer_disabled;
1062 }
1063
1064 /**
1065  * tracing_is_on - show state of ring buffers enabled
1066  */
1067 int tracing_is_on(void)
1068 {
1069         return tracer_tracing_is_on(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_is_on);
1072
1073 static int __init set_buf_size(char *str)
1074 {
1075         unsigned long buf_size;
1076
1077         if (!str)
1078                 return 0;
1079         buf_size = memparse(str, &str);
1080         /* nr_entries can not be zero */
1081         if (buf_size == 0)
1082                 return 0;
1083         trace_buf_size = buf_size;
1084         return 1;
1085 }
1086 __setup("trace_buf_size=", set_buf_size);
1087
1088 static int __init set_tracing_thresh(char *str)
1089 {
1090         unsigned long threshold;
1091         int ret;
1092
1093         if (!str)
1094                 return 0;
1095         ret = kstrtoul(str, 0, &threshold);
1096         if (ret < 0)
1097                 return 0;
1098         tracing_thresh = threshold * 1000;
1099         return 1;
1100 }
1101 __setup("tracing_thresh=", set_tracing_thresh);
1102
1103 unsigned long nsecs_to_usecs(unsigned long nsecs)
1104 {
1105         return nsecs / 1000;
1106 }
1107
1108 /*
1109  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1110  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1111  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1112  * of strings in the order that the enums were defined.
1113  */
1114 #undef C
1115 #define C(a, b) b
1116
1117 /* These must match the bit postions in trace_iterator_flags */
1118 static const char *trace_options[] = {
1119         TRACE_FLAGS
1120         NULL
1121 };
1122
1123 static struct {
1124         u64 (*func)(void);
1125         const char *name;
1126         int in_ns;              /* is this clock in nanoseconds? */
1127 } trace_clocks[] = {
1128         { trace_clock_local,            "local",        1 },
1129         { trace_clock_global,           "global",       1 },
1130         { trace_clock_counter,          "counter",      0 },
1131         { trace_clock_jiffies,          "uptime",       0 },
1132         { trace_clock,                  "perf",         1 },
1133         { ktime_get_mono_fast_ns,       "mono",         1 },
1134         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1135         ARCH_TRACE_CLOCKS
1136 };
1137
1138 /*
1139  * trace_parser_get_init - gets the buffer for trace parser
1140  */
1141 int trace_parser_get_init(struct trace_parser *parser, int size)
1142 {
1143         memset(parser, 0, sizeof(*parser));
1144
1145         parser->buffer = kmalloc(size, GFP_KERNEL);
1146         if (!parser->buffer)
1147                 return 1;
1148
1149         parser->size = size;
1150         return 0;
1151 }
1152
1153 /*
1154  * trace_parser_put - frees the buffer for trace parser
1155  */
1156 void trace_parser_put(struct trace_parser *parser)
1157 {
1158         kfree(parser->buffer);
1159 }
1160
1161 /*
1162  * trace_get_user - reads the user input string separated by  space
1163  * (matched by isspace(ch))
1164  *
1165  * For each string found the 'struct trace_parser' is updated,
1166  * and the function returns.
1167  *
1168  * Returns number of bytes read.
1169  *
1170  * See kernel/trace/trace.h for 'struct trace_parser' details.
1171  */
1172 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1173         size_t cnt, loff_t *ppos)
1174 {
1175         char ch;
1176         size_t read = 0;
1177         ssize_t ret;
1178
1179         if (!*ppos)
1180                 trace_parser_clear(parser);
1181
1182         ret = get_user(ch, ubuf++);
1183         if (ret)
1184                 goto out;
1185
1186         read++;
1187         cnt--;
1188
1189         /*
1190          * The parser is not finished with the last write,
1191          * continue reading the user input without skipping spaces.
1192          */
1193         if (!parser->cont) {
1194                 /* skip white space */
1195                 while (cnt && isspace(ch)) {
1196                         ret = get_user(ch, ubuf++);
1197                         if (ret)
1198                                 goto out;
1199                         read++;
1200                         cnt--;
1201                 }
1202
1203                 /* only spaces were written */
1204                 if (isspace(ch)) {
1205                         *ppos += read;
1206                         ret = read;
1207                         goto out;
1208                 }
1209
1210                 parser->idx = 0;
1211         }
1212
1213         /* read the non-space input */
1214         while (cnt && !isspace(ch)) {
1215                 if (parser->idx < parser->size - 1)
1216                         parser->buffer[parser->idx++] = ch;
1217                 else {
1218                         ret = -EINVAL;
1219                         goto out;
1220                 }
1221                 ret = get_user(ch, ubuf++);
1222                 if (ret)
1223                         goto out;
1224                 read++;
1225                 cnt--;
1226         }
1227
1228         /* We either got finished input or we have to wait for another call. */
1229         if (isspace(ch)) {
1230                 parser->buffer[parser->idx] = 0;
1231                 parser->cont = false;
1232         } else if (parser->idx < parser->size - 1) {
1233                 parser->cont = true;
1234                 parser->buffer[parser->idx++] = ch;
1235         } else {
1236                 ret = -EINVAL;
1237                 goto out;
1238         }
1239
1240         *ppos += read;
1241         ret = read;
1242
1243 out:
1244         return ret;
1245 }
1246
1247 /* TODO add a seq_buf_to_buffer() */
1248 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1249 {
1250         int len;
1251
1252         if (trace_seq_used(s) <= s->seq.readpos)
1253                 return -EBUSY;
1254
1255         len = trace_seq_used(s) - s->seq.readpos;
1256         if (cnt > len)
1257                 cnt = len;
1258         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1259
1260         s->seq.readpos += cnt;
1261         return cnt;
1262 }
1263
1264 unsigned long __read_mostly     tracing_thresh;
1265
1266 #ifdef CONFIG_TRACER_MAX_TRACE
1267 /*
1268  * Copy the new maximum trace into the separate maximum-trace
1269  * structure. (this way the maximum trace is permanently saved,
1270  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1271  */
1272 static void
1273 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1274 {
1275         struct trace_buffer *trace_buf = &tr->trace_buffer;
1276         struct trace_buffer *max_buf = &tr->max_buffer;
1277         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1278         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1279
1280         max_buf->cpu = cpu;
1281         max_buf->time_start = data->preempt_timestamp;
1282
1283         max_data->saved_latency = tr->max_latency;
1284         max_data->critical_start = data->critical_start;
1285         max_data->critical_end = data->critical_end;
1286
1287         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1288         max_data->pid = tsk->pid;
1289         /*
1290          * If tsk == current, then use current_uid(), as that does not use
1291          * RCU. The irq tracer can be called out of RCU scope.
1292          */
1293         if (tsk == current)
1294                 max_data->uid = current_uid();
1295         else
1296                 max_data->uid = task_uid(tsk);
1297
1298         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1299         max_data->policy = tsk->policy;
1300         max_data->rt_priority = tsk->rt_priority;
1301
1302         /* record this tasks comm */
1303         tracing_record_cmdline(tsk);
1304 }
1305
1306 /**
1307  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1308  * @tr: tracer
1309  * @tsk: the task with the latency
1310  * @cpu: The cpu that initiated the trace.
1311  *
1312  * Flip the buffers between the @tr and the max_tr and record information
1313  * about which task was the cause of this latency.
1314  */
1315 void
1316 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1317 {
1318         struct ring_buffer *buf;
1319
1320         if (tr->stop_count)
1321                 return;
1322
1323         WARN_ON_ONCE(!irqs_disabled());
1324
1325         if (!tr->allocated_snapshot) {
1326                 /* Only the nop tracer should hit this when disabling */
1327                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1328                 return;
1329         }
1330
1331         arch_spin_lock(&tr->max_lock);
1332
1333         /* Inherit the recordable setting from trace_buffer */
1334         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1335                 ring_buffer_record_on(tr->max_buffer.buffer);
1336         else
1337                 ring_buffer_record_off(tr->max_buffer.buffer);
1338
1339         buf = tr->trace_buffer.buffer;
1340         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1341         tr->max_buffer.buffer = buf;
1342
1343         __update_max_tr(tr, tsk, cpu);
1344         arch_spin_unlock(&tr->max_lock);
1345 }
1346
1347 /**
1348  * update_max_tr_single - only copy one trace over, and reset the rest
1349  * @tr - tracer
1350  * @tsk - task with the latency
1351  * @cpu - the cpu of the buffer to copy.
1352  *
1353  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1354  */
1355 void
1356 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1357 {
1358         int ret;
1359
1360         if (tr->stop_count)
1361                 return;
1362
1363         WARN_ON_ONCE(!irqs_disabled());
1364         if (!tr->allocated_snapshot) {
1365                 /* Only the nop tracer should hit this when disabling */
1366                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1367                 return;
1368         }
1369
1370         arch_spin_lock(&tr->max_lock);
1371
1372         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1373
1374         if (ret == -EBUSY) {
1375                 /*
1376                  * We failed to swap the buffer due to a commit taking
1377                  * place on this CPU. We fail to record, but we reset
1378                  * the max trace buffer (no one writes directly to it)
1379                  * and flag that it failed.
1380                  */
1381                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1382                         "Failed to swap buffers due to commit in progress\n");
1383         }
1384
1385         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1386
1387         __update_max_tr(tr, tsk, cpu);
1388         arch_spin_unlock(&tr->max_lock);
1389 }
1390 #endif /* CONFIG_TRACER_MAX_TRACE */
1391
1392 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1393 {
1394         /* Iterators are static, they should be filled or empty */
1395         if (trace_buffer_iter(iter, iter->cpu_file))
1396                 return 0;
1397
1398         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1399                                 full);
1400 }
1401
1402 #ifdef CONFIG_FTRACE_STARTUP_TEST
1403 static int run_tracer_selftest(struct tracer *type)
1404 {
1405         struct trace_array *tr = &global_trace;
1406         struct tracer *saved_tracer = tr->current_trace;
1407         int ret;
1408
1409         if (!type->selftest || tracing_selftest_disabled)
1410                 return 0;
1411
1412         /*
1413          * Run a selftest on this tracer.
1414          * Here we reset the trace buffer, and set the current
1415          * tracer to be this tracer. The tracer can then run some
1416          * internal tracing to verify that everything is in order.
1417          * If we fail, we do not register this tracer.
1418          */
1419         tracing_reset_online_cpus(&tr->trace_buffer);
1420
1421         tr->current_trace = type;
1422
1423 #ifdef CONFIG_TRACER_MAX_TRACE
1424         if (type->use_max_tr) {
1425                 /* If we expanded the buffers, make sure the max is expanded too */
1426                 if (ring_buffer_expanded)
1427                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1428                                            RING_BUFFER_ALL_CPUS);
1429                 tr->allocated_snapshot = true;
1430         }
1431 #endif
1432
1433         /* the test is responsible for initializing and enabling */
1434         pr_info("Testing tracer %s: ", type->name);
1435         ret = type->selftest(type, tr);
1436         /* the test is responsible for resetting too */
1437         tr->current_trace = saved_tracer;
1438         if (ret) {
1439                 printk(KERN_CONT "FAILED!\n");
1440                 /* Add the warning after printing 'FAILED' */
1441                 WARN_ON(1);
1442                 return -1;
1443         }
1444         /* Only reset on passing, to avoid touching corrupted buffers */
1445         tracing_reset_online_cpus(&tr->trace_buffer);
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         if (type->use_max_tr) {
1449                 tr->allocated_snapshot = false;
1450
1451                 /* Shrink the max buffer again */
1452                 if (ring_buffer_expanded)
1453                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1454                                            RING_BUFFER_ALL_CPUS);
1455         }
1456 #endif
1457
1458         printk(KERN_CONT "PASSED\n");
1459         return 0;
1460 }
1461 #else
1462 static inline int run_tracer_selftest(struct tracer *type)
1463 {
1464         return 0;
1465 }
1466 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1467
1468 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1469
1470 static void __init apply_trace_boot_options(void);
1471
1472 /**
1473  * register_tracer - register a tracer with the ftrace system.
1474  * @type - the plugin for the tracer
1475  *
1476  * Register a new plugin tracer.
1477  */
1478 int __init register_tracer(struct tracer *type)
1479 {
1480         struct tracer *t;
1481         int ret = 0;
1482
1483         if (!type->name) {
1484                 pr_info("Tracer must have a name\n");
1485                 return -1;
1486         }
1487
1488         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1489                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1490                 return -1;
1491         }
1492
1493         mutex_lock(&trace_types_lock);
1494
1495         tracing_selftest_running = true;
1496
1497         for (t = trace_types; t; t = t->next) {
1498                 if (strcmp(type->name, t->name) == 0) {
1499                         /* already found */
1500                         pr_info("Tracer %s already registered\n",
1501                                 type->name);
1502                         ret = -1;
1503                         goto out;
1504                 }
1505         }
1506
1507         if (!type->set_flag)
1508                 type->set_flag = &dummy_set_flag;
1509         if (!type->flags) {
1510                 /*allocate a dummy tracer_flags*/
1511                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1512                 if (!type->flags) {
1513                         ret = -ENOMEM;
1514                         goto out;
1515                 }
1516                 type->flags->val = 0;
1517                 type->flags->opts = dummy_tracer_opt;
1518         } else
1519                 if (!type->flags->opts)
1520                         type->flags->opts = dummy_tracer_opt;
1521
1522         /* store the tracer for __set_tracer_option */
1523         type->flags->trace = type;
1524
1525         ret = run_tracer_selftest(type);
1526         if (ret < 0)
1527                 goto out;
1528
1529         type->next = trace_types;
1530         trace_types = type;
1531         add_tracer_options(&global_trace, type);
1532
1533  out:
1534         tracing_selftest_running = false;
1535         mutex_unlock(&trace_types_lock);
1536
1537         if (ret || !default_bootup_tracer)
1538                 goto out_unlock;
1539
1540         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1541                 goto out_unlock;
1542
1543         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1544         /* Do we want this tracer to start on bootup? */
1545         tracing_set_tracer(&global_trace, type->name);
1546         default_bootup_tracer = NULL;
1547
1548         apply_trace_boot_options();
1549
1550         /* disable other selftests, since this will break it. */
1551         tracing_selftest_disabled = true;
1552 #ifdef CONFIG_FTRACE_STARTUP_TEST
1553         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1554                type->name);
1555 #endif
1556
1557  out_unlock:
1558         return ret;
1559 }
1560
1561 void tracing_reset(struct trace_buffer *buf, int cpu)
1562 {
1563         struct ring_buffer *buffer = buf->buffer;
1564
1565         if (!buffer)
1566                 return;
1567
1568         ring_buffer_record_disable(buffer);
1569
1570         /* Make sure all commits have finished */
1571         synchronize_sched();
1572         ring_buffer_reset_cpu(buffer, cpu);
1573
1574         ring_buffer_record_enable(buffer);
1575 }
1576
1577 void tracing_reset_online_cpus(struct trace_buffer *buf)
1578 {
1579         struct ring_buffer *buffer = buf->buffer;
1580         int cpu;
1581
1582         if (!buffer)
1583                 return;
1584
1585         ring_buffer_record_disable(buffer);
1586
1587         /* Make sure all commits have finished */
1588         synchronize_sched();
1589
1590         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1591
1592         for_each_online_cpu(cpu)
1593                 ring_buffer_reset_cpu(buffer, cpu);
1594
1595         ring_buffer_record_enable(buffer);
1596 }
1597
1598 /* Must have trace_types_lock held */
1599 void tracing_reset_all_online_cpus(void)
1600 {
1601         struct trace_array *tr;
1602
1603         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1604                 tracing_reset_online_cpus(&tr->trace_buffer);
1605 #ifdef CONFIG_TRACER_MAX_TRACE
1606                 tracing_reset_online_cpus(&tr->max_buffer);
1607 #endif
1608         }
1609 }
1610
1611 #define SAVED_CMDLINES_DEFAULT 128
1612 #define NO_CMDLINE_MAP UINT_MAX
1613 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1614 struct saved_cmdlines_buffer {
1615         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1616         unsigned *map_cmdline_to_pid;
1617         unsigned cmdline_num;
1618         int cmdline_idx;
1619         char *saved_cmdlines;
1620 };
1621 static struct saved_cmdlines_buffer *savedcmd;
1622
1623 static inline char *get_saved_cmdlines(int idx)
1624 {
1625         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1626 }
1627
1628 static inline void set_cmdline(int idx, const char *cmdline)
1629 {
1630         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1631 }
1632
1633 static int allocate_cmdlines_buffer(unsigned int val,
1634                                     struct saved_cmdlines_buffer *s)
1635 {
1636         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1637                                         GFP_KERNEL);
1638         if (!s->map_cmdline_to_pid)
1639                 return -ENOMEM;
1640
1641         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1642         if (!s->saved_cmdlines) {
1643                 kfree(s->map_cmdline_to_pid);
1644                 return -ENOMEM;
1645         }
1646
1647         s->cmdline_idx = 0;
1648         s->cmdline_num = val;
1649         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1650                sizeof(s->map_pid_to_cmdline));
1651         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1652                val * sizeof(*s->map_cmdline_to_pid));
1653
1654         return 0;
1655 }
1656
1657 static int trace_create_savedcmd(void)
1658 {
1659         int ret;
1660
1661         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1662         if (!savedcmd)
1663                 return -ENOMEM;
1664
1665         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1666         if (ret < 0) {
1667                 kfree(savedcmd);
1668                 savedcmd = NULL;
1669                 return -ENOMEM;
1670         }
1671
1672         return 0;
1673 }
1674
1675 int is_tracing_stopped(void)
1676 {
1677         return global_trace.stop_count;
1678 }
1679
1680 /**
1681  * tracing_start - quick start of the tracer
1682  *
1683  * If tracing is enabled but was stopped by tracing_stop,
1684  * this will start the tracer back up.
1685  */
1686 void tracing_start(void)
1687 {
1688         struct ring_buffer *buffer;
1689         unsigned long flags;
1690
1691         if (tracing_disabled)
1692                 return;
1693
1694         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1695         if (--global_trace.stop_count) {
1696                 if (global_trace.stop_count < 0) {
1697                         /* Someone screwed up their debugging */
1698                         WARN_ON_ONCE(1);
1699                         global_trace.stop_count = 0;
1700                 }
1701                 goto out;
1702         }
1703
1704         /* Prevent the buffers from switching */
1705         arch_spin_lock(&global_trace.max_lock);
1706
1707         buffer = global_trace.trace_buffer.buffer;
1708         if (buffer)
1709                 ring_buffer_record_enable(buffer);
1710
1711 #ifdef CONFIG_TRACER_MAX_TRACE
1712         buffer = global_trace.max_buffer.buffer;
1713         if (buffer)
1714                 ring_buffer_record_enable(buffer);
1715 #endif
1716
1717         arch_spin_unlock(&global_trace.max_lock);
1718
1719  out:
1720         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1721 }
1722
1723 static void tracing_start_tr(struct trace_array *tr)
1724 {
1725         struct ring_buffer *buffer;
1726         unsigned long flags;
1727
1728         if (tracing_disabled)
1729                 return;
1730
1731         /* If global, we need to also start the max tracer */
1732         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1733                 return tracing_start();
1734
1735         raw_spin_lock_irqsave(&tr->start_lock, flags);
1736
1737         if (--tr->stop_count) {
1738                 if (tr->stop_count < 0) {
1739                         /* Someone screwed up their debugging */
1740                         WARN_ON_ONCE(1);
1741                         tr->stop_count = 0;
1742                 }
1743                 goto out;
1744         }
1745
1746         buffer = tr->trace_buffer.buffer;
1747         if (buffer)
1748                 ring_buffer_record_enable(buffer);
1749
1750  out:
1751         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1752 }
1753
1754 /**
1755  * tracing_stop - quick stop of the tracer
1756  *
1757  * Light weight way to stop tracing. Use in conjunction with
1758  * tracing_start.
1759  */
1760 void tracing_stop(void)
1761 {
1762         struct ring_buffer *buffer;
1763         unsigned long flags;
1764
1765         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1766         if (global_trace.stop_count++)
1767                 goto out;
1768
1769         /* Prevent the buffers from switching */
1770         arch_spin_lock(&global_trace.max_lock);
1771
1772         buffer = global_trace.trace_buffer.buffer;
1773         if (buffer)
1774                 ring_buffer_record_disable(buffer);
1775
1776 #ifdef CONFIG_TRACER_MAX_TRACE
1777         buffer = global_trace.max_buffer.buffer;
1778         if (buffer)
1779                 ring_buffer_record_disable(buffer);
1780 #endif
1781
1782         arch_spin_unlock(&global_trace.max_lock);
1783
1784  out:
1785         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1786 }
1787
1788 static void tracing_stop_tr(struct trace_array *tr)
1789 {
1790         struct ring_buffer *buffer;
1791         unsigned long flags;
1792
1793         /* If global, we need to also stop the max tracer */
1794         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1795                 return tracing_stop();
1796
1797         raw_spin_lock_irqsave(&tr->start_lock, flags);
1798         if (tr->stop_count++)
1799                 goto out;
1800
1801         buffer = tr->trace_buffer.buffer;
1802         if (buffer)
1803                 ring_buffer_record_disable(buffer);
1804
1805  out:
1806         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1807 }
1808
1809 void trace_stop_cmdline_recording(void);
1810
1811 static int trace_save_cmdline(struct task_struct *tsk)
1812 {
1813         unsigned tpid, idx;
1814
1815         /* treat recording of idle task as a success */
1816         if (!tsk->pid)
1817                 return 1;
1818
1819         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1820
1821         /*
1822          * It's not the end of the world if we don't get
1823          * the lock, but we also don't want to spin
1824          * nor do we want to disable interrupts,
1825          * so if we miss here, then better luck next time.
1826          */
1827         if (!arch_spin_trylock(&trace_cmdline_lock))
1828                 return 0;
1829
1830         idx = savedcmd->map_pid_to_cmdline[tpid];
1831         if (idx == NO_CMDLINE_MAP) {
1832                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1833
1834                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1835                 savedcmd->cmdline_idx = idx;
1836         }
1837
1838         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1839         set_cmdline(idx, tsk->comm);
1840
1841         arch_spin_unlock(&trace_cmdline_lock);
1842
1843         return 1;
1844 }
1845
1846 static void __trace_find_cmdline(int pid, char comm[])
1847 {
1848         unsigned map;
1849         int tpid;
1850
1851         if (!pid) {
1852                 strcpy(comm, "<idle>");
1853                 return;
1854         }
1855
1856         if (WARN_ON_ONCE(pid < 0)) {
1857                 strcpy(comm, "<XXX>");
1858                 return;
1859         }
1860
1861         tpid = pid & (PID_MAX_DEFAULT - 1);
1862         map = savedcmd->map_pid_to_cmdline[tpid];
1863         if (map != NO_CMDLINE_MAP) {
1864                 tpid = savedcmd->map_cmdline_to_pid[map];
1865                 if (tpid == pid) {
1866                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1867                         return;
1868                 }
1869         }
1870         strcpy(comm, "<...>");
1871 }
1872
1873 void trace_find_cmdline(int pid, char comm[])
1874 {
1875         preempt_disable();
1876         arch_spin_lock(&trace_cmdline_lock);
1877
1878         __trace_find_cmdline(pid, comm);
1879
1880         arch_spin_unlock(&trace_cmdline_lock);
1881         preempt_enable();
1882 }
1883
1884 void tracing_record_cmdline(struct task_struct *tsk)
1885 {
1886         if (!__this_cpu_read(trace_cmdline_save))
1887                 return;
1888
1889         if (trace_save_cmdline(tsk))
1890                 __this_cpu_write(trace_cmdline_save, false);
1891 }
1892
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895                              int pc)
1896 {
1897         struct task_struct *tsk = current;
1898
1899         entry->preempt_count            = pc & 0xff;
1900         entry->pid                      = (tsk) ? tsk->pid : 0;
1901         entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905                 TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1910                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917                   int type, unsigned long flags, int pc)
1918 {
1919         struct trace_entry *ent = ring_buffer_event_data(event);
1920
1921         tracing_generic_entry_update(ent, flags, pc);
1922         ent->type = type;
1923 }
1924
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927                           int type,
1928                           unsigned long len,
1929                           unsigned long flags, int pc)
1930 {
1931         struct ring_buffer_event *event;
1932
1933         event = ring_buffer_lock_reserve(buffer, len);
1934         if (event != NULL)
1935                 trace_event_setup(event, type, flags, pc);
1936
1937         return event;
1938 }
1939
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960         struct ring_buffer_event *event;
1961         struct page *page;
1962         int cpu;
1963
1964         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965
1966         if (trace_buffered_event_ref++)
1967                 return;
1968
1969         for_each_tracing_cpu(cpu) {
1970                 page = alloc_pages_node(cpu_to_node(cpu),
1971                                         GFP_KERNEL | __GFP_NORETRY, 0);
1972                 if (!page)
1973                         goto failed;
1974
1975                 event = page_address(page);
1976                 memset(event, 0, sizeof(*event));
1977
1978                 per_cpu(trace_buffered_event, cpu) = event;
1979
1980                 preempt_disable();
1981                 if (cpu == smp_processor_id() &&
1982                     this_cpu_read(trace_buffered_event) !=
1983                     per_cpu(trace_buffered_event, cpu))
1984                         WARN_ON_ONCE(1);
1985                 preempt_enable();
1986         }
1987
1988         return;
1989  failed:
1990         trace_buffered_event_disable();
1991 }
1992
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995         /* Probably not needed, but do it anyway */
1996         smp_rmb();
1997         this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002         this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015         int cpu;
2016
2017         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018
2019         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020                 return;
2021
2022         if (--trace_buffered_event_ref)
2023                 return;
2024
2025         preempt_disable();
2026         /* For each CPU, set the buffer as used. */
2027         smp_call_function_many(tracing_buffer_mask,
2028                                disable_trace_buffered_event, NULL, 1);
2029         preempt_enable();
2030
2031         /* Wait for all current users to finish */
2032         synchronize_sched();
2033
2034         for_each_tracing_cpu(cpu) {
2035                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036                 per_cpu(trace_buffered_event, cpu) = NULL;
2037         }
2038         /*
2039          * Make sure trace_buffered_event is NULL before clearing
2040          * trace_buffered_event_cnt.
2041          */
2042         smp_wmb();
2043
2044         preempt_disable();
2045         /* Do the work on each cpu */
2046         smp_call_function_many(tracing_buffer_mask,
2047                                enable_trace_buffered_event, NULL, 1);
2048         preempt_enable();
2049 }
2050
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054         __this_cpu_write(trace_cmdline_save, true);
2055
2056         /* If this is the temp buffer, we need to commit fully */
2057         if (this_cpu_read(trace_buffered_event) == event) {
2058                 /* Length is in event->array[0] */
2059                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060                 /* Release the temp buffer */
2061                 this_cpu_dec(trace_buffered_event_cnt);
2062         } else
2063                 ring_buffer_unlock_commit(buffer, event);
2064 }
2065
2066 static struct ring_buffer *temp_buffer;
2067
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070                           struct trace_event_file *trace_file,
2071                           int type, unsigned long len,
2072                           unsigned long flags, int pc)
2073 {
2074         struct ring_buffer_event *entry;
2075         int val;
2076
2077         *current_rb = trace_file->tr->trace_buffer.buffer;
2078
2079         if ((trace_file->flags &
2080              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081             (entry = this_cpu_read(trace_buffered_event))) {
2082                 /* Try to use the per cpu buffer first */
2083                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2084                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2085                         trace_event_setup(entry, type, flags, pc);
2086                         entry->array[0] = len;
2087                         return entry;
2088                 }
2089                 this_cpu_dec(trace_buffered_event_cnt);
2090         }
2091
2092         entry = trace_buffer_lock_reserve(*current_rb,
2093                                          type, len, flags, pc);
2094         /*
2095          * If tracing is off, but we have triggers enabled
2096          * we still need to look at the event data. Use the temp_buffer
2097          * to store the trace event for the tigger to use. It's recusive
2098          * safe and will not be recorded anywhere.
2099          */
2100         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101                 *current_rb = temp_buffer;
2102                 entry = trace_buffer_lock_reserve(*current_rb,
2103                                                   type, len, flags, pc);
2104         }
2105         return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110                                      struct ring_buffer *buffer,
2111                                      struct ring_buffer_event *event,
2112                                      unsigned long flags, int pc,
2113                                      struct pt_regs *regs)
2114 {
2115         __buffer_unlock_commit(buffer, event);
2116
2117         /*
2118          * If regs is not set, then skip the following callers:
2119          *   trace_buffer_unlock_commit_regs
2120          *   event_trigger_unlock_commit
2121          *   trace_event_buffer_commit
2122          *   trace_event_raw_event_sched_switch
2123          * Note, we can still get here via blktrace, wakeup tracer
2124          * and mmiotrace, but that's ok if they lose a function or
2125          * two. They are that meaningful.
2126          */
2127         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128         ftrace_trace_userstack(tr, buffer, flags, pc);
2129 }
2130
2131 void
2132 trace_function(struct trace_array *tr,
2133                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134                int pc)
2135 {
2136         struct trace_event_call *call = &event_function;
2137         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138         struct ring_buffer_event *event;
2139         struct ftrace_entry *entry;
2140
2141         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142                                           flags, pc);
2143         if (!event)
2144                 return;
2145         entry   = ring_buffer_event_data(event);
2146         entry->ip                       = ip;
2147         entry->parent_ip                = parent_ip;
2148
2149         if (!call_filter_check_discard(call, entry, buffer, event))
2150                 __buffer_unlock_commit(buffer, event);
2151 }
2152
2153 #ifdef CONFIG_STACKTRACE
2154
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164                                  unsigned long flags,
2165                                  int skip, int pc, struct pt_regs *regs)
2166 {
2167         struct trace_event_call *call = &event_kernel_stack;
2168         struct ring_buffer_event *event;
2169         struct stack_entry *entry;
2170         struct stack_trace trace;
2171         int use_stack;
2172         int size = FTRACE_STACK_ENTRIES;
2173
2174         trace.nr_entries        = 0;
2175         trace.skip              = skip;
2176
2177         /*
2178          * Add two, for this function and the call to save_stack_trace()
2179          * If regs is set, then these functions will not be in the way.
2180          */
2181         if (!regs)
2182                 trace.skip += 2;
2183
2184         /*
2185          * Since events can happen in NMIs there's no safe way to
2186          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187          * or NMI comes in, it will just have to use the default
2188          * FTRACE_STACK_SIZE.
2189          */
2190         preempt_disable_notrace();
2191
2192         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193         /*
2194          * We don't need any atomic variables, just a barrier.
2195          * If an interrupt comes in, we don't care, because it would
2196          * have exited and put the counter back to what we want.
2197          * We just need a barrier to keep gcc from moving things
2198          * around.
2199          */
2200         barrier();
2201         if (use_stack == 1) {
2202                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2203                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2204
2205                 if (regs)
2206                         save_stack_trace_regs(regs, &trace);
2207                 else
2208                         save_stack_trace(&trace);
2209
2210                 if (trace.nr_entries > size)
2211                         size = trace.nr_entries;
2212         } else
2213                 /* From now on, use_stack is a boolean */
2214                 use_stack = 0;
2215
2216         size *= sizeof(unsigned long);
2217
2218         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2220                                     flags, pc);
2221         if (!event)
2222                 goto out;
2223         entry = ring_buffer_event_data(event);
2224
2225         memset(&entry->caller, 0, size);
2226
2227         if (use_stack)
2228                 memcpy(&entry->caller, trace.entries,
2229                        trace.nr_entries * sizeof(unsigned long));
2230         else {
2231                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2232                 trace.entries           = entry->caller;
2233                 if (regs)
2234                         save_stack_trace_regs(regs, &trace);
2235                 else
2236                         save_stack_trace(&trace);
2237         }
2238
2239         entry->size = trace.nr_entries;
2240
2241         if (!call_filter_check_discard(call, entry, buffer, event))
2242                 __buffer_unlock_commit(buffer, event);
2243
2244  out:
2245         /* Again, don't let gcc optimize things here */
2246         barrier();
2247         __this_cpu_dec(ftrace_stack_reserve);
2248         preempt_enable_notrace();
2249
2250 }
2251
2252 static inline void ftrace_trace_stack(struct trace_array *tr,
2253                                       struct ring_buffer *buffer,
2254                                       unsigned long flags,
2255                                       int skip, int pc, struct pt_regs *regs)
2256 {
2257         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2258                 return;
2259
2260         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2261 }
2262
2263 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2264                    int pc)
2265 {
2266         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2267 }
2268
2269 /**
2270  * trace_dump_stack - record a stack back trace in the trace buffer
2271  * @skip: Number of functions to skip (helper handlers)
2272  */
2273 void trace_dump_stack(int skip)
2274 {
2275         unsigned long flags;
2276
2277         if (tracing_disabled || tracing_selftest_running)
2278                 return;
2279
2280         local_save_flags(flags);
2281
2282         /*
2283          * Skip 3 more, seems to get us at the caller of
2284          * this function.
2285          */
2286         skip += 3;
2287         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2288                              flags, skip, preempt_count(), NULL);
2289 }
2290
2291 static DEFINE_PER_CPU(int, user_stack_count);
2292
2293 void
2294 ftrace_trace_userstack(struct trace_array *tr,
2295                        struct ring_buffer *buffer, unsigned long flags, int pc)
2296 {
2297         struct trace_event_call *call = &event_user_stack;
2298         struct ring_buffer_event *event;
2299         struct userstack_entry *entry;
2300         struct stack_trace trace;
2301
2302         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2303                 return;
2304
2305         /*
2306          * NMIs can not handle page faults, even with fix ups.
2307          * The save user stack can (and often does) fault.
2308          */
2309         if (unlikely(in_nmi()))
2310                 return;
2311
2312         /*
2313          * prevent recursion, since the user stack tracing may
2314          * trigger other kernel events.
2315          */
2316         preempt_disable();
2317         if (__this_cpu_read(user_stack_count))
2318                 goto out;
2319
2320         __this_cpu_inc(user_stack_count);
2321
2322         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2323                                           sizeof(*entry), flags, pc);
2324         if (!event)
2325                 goto out_drop_count;
2326         entry   = ring_buffer_event_data(event);
2327
2328         entry->tgid             = current->tgid;
2329         memset(&entry->caller, 0, sizeof(entry->caller));
2330
2331         trace.nr_entries        = 0;
2332         trace.max_entries       = FTRACE_STACK_ENTRIES;
2333         trace.skip              = 0;
2334         trace.entries           = entry->caller;
2335
2336         save_stack_trace_user(&trace);
2337         if (!call_filter_check_discard(call, entry, buffer, event))
2338                 __buffer_unlock_commit(buffer, event);
2339
2340  out_drop_count:
2341         __this_cpu_dec(user_stack_count);
2342  out:
2343         preempt_enable();
2344 }
2345
2346 #ifdef UNUSED
2347 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2348 {
2349         ftrace_trace_userstack(tr, flags, preempt_count());
2350 }
2351 #endif /* UNUSED */
2352
2353 #endif /* CONFIG_STACKTRACE */
2354
2355 /* created for use with alloc_percpu */
2356 struct trace_buffer_struct {
2357         int nesting;
2358         char buffer[4][TRACE_BUF_SIZE];
2359 };
2360
2361 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2362
2363 /*
2364  * Thise allows for lockless recording.  If we're nested too deeply, then
2365  * this returns NULL.
2366  */
2367 static char *get_trace_buf(void)
2368 {
2369         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2370
2371         if (!trace_percpu_buffer || buffer->nesting >= 4)
2372                 return NULL;
2373
2374         buffer->nesting++;
2375
2376         /* Interrupts must see nesting incremented before we use the buffer */
2377         barrier();
2378         return &buffer->buffer[buffer->nesting - 1][0];
2379 }
2380
2381 static void put_trace_buf(void)
2382 {
2383         /* Don't let the decrement of nesting leak before this */
2384         barrier();
2385         this_cpu_dec(trace_percpu_buffer->nesting);
2386 }
2387
2388 static int alloc_percpu_trace_buffer(void)
2389 {
2390         struct trace_buffer_struct __percpu *buffers;
2391
2392         buffers = alloc_percpu(struct trace_buffer_struct);
2393         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2394                 return -ENOMEM;
2395
2396         trace_percpu_buffer = buffers;
2397         return 0;
2398 }
2399
2400 static int buffers_allocated;
2401
2402 void trace_printk_init_buffers(void)
2403 {
2404         if (buffers_allocated)
2405                 return;
2406
2407         if (alloc_percpu_trace_buffer())
2408                 return;
2409
2410         /* trace_printk() is for debug use only. Don't use it in production. */
2411
2412         pr_warn("\n");
2413         pr_warn("**********************************************************\n");
2414         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2415         pr_warn("**                                                      **\n");
2416         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2417         pr_warn("**                                                      **\n");
2418         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2419         pr_warn("** unsafe for production use.                           **\n");
2420         pr_warn("**                                                      **\n");
2421         pr_warn("** If you see this message and you are not debugging    **\n");
2422         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2423         pr_warn("**                                                      **\n");
2424         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2425         pr_warn("**********************************************************\n");
2426
2427         /* Expand the buffers to set size */
2428         tracing_update_buffers();
2429
2430         buffers_allocated = 1;
2431
2432         /*
2433          * trace_printk_init_buffers() can be called by modules.
2434          * If that happens, then we need to start cmdline recording
2435          * directly here. If the global_trace.buffer is already
2436          * allocated here, then this was called by module code.
2437          */
2438         if (global_trace.trace_buffer.buffer)
2439                 tracing_start_cmdline_record();
2440 }
2441
2442 void trace_printk_start_comm(void)
2443 {
2444         /* Start tracing comms if trace printk is set */
2445         if (!buffers_allocated)
2446                 return;
2447         tracing_start_cmdline_record();
2448 }
2449
2450 static void trace_printk_start_stop_comm(int enabled)
2451 {
2452         if (!buffers_allocated)
2453                 return;
2454
2455         if (enabled)
2456                 tracing_start_cmdline_record();
2457         else
2458                 tracing_stop_cmdline_record();
2459 }
2460
2461 /**
2462  * trace_vbprintk - write binary msg to tracing buffer
2463  *
2464  */
2465 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2466 {
2467         struct trace_event_call *call = &event_bprint;
2468         struct ring_buffer_event *event;
2469         struct ring_buffer *buffer;
2470         struct trace_array *tr = &global_trace;
2471         struct bprint_entry *entry;
2472         unsigned long flags;
2473         char *tbuffer;
2474         int len = 0, size, pc;
2475
2476         if (unlikely(tracing_selftest_running || tracing_disabled))
2477                 return 0;
2478
2479         /* Don't pollute graph traces with trace_vprintk internals */
2480         pause_graph_tracing();
2481
2482         pc = preempt_count();
2483         preempt_disable_notrace();
2484
2485         tbuffer = get_trace_buf();
2486         if (!tbuffer) {
2487                 len = 0;
2488                 goto out_nobuffer;
2489         }
2490
2491         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2492
2493         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2494                 goto out;
2495
2496         local_save_flags(flags);
2497         size = sizeof(*entry) + sizeof(u32) * len;
2498         buffer = tr->trace_buffer.buffer;
2499         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2500                                           flags, pc);
2501         if (!event)
2502                 goto out;
2503         entry = ring_buffer_event_data(event);
2504         entry->ip                       = ip;
2505         entry->fmt                      = fmt;
2506
2507         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2508         if (!call_filter_check_discard(call, entry, buffer, event)) {
2509                 __buffer_unlock_commit(buffer, event);
2510                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2511         }
2512
2513 out:
2514         put_trace_buf();
2515
2516 out_nobuffer:
2517         preempt_enable_notrace();
2518         unpause_graph_tracing();
2519
2520         return len;
2521 }
2522 EXPORT_SYMBOL_GPL(trace_vbprintk);
2523
2524 __printf(3, 0)
2525 static int
2526 __trace_array_vprintk(struct ring_buffer *buffer,
2527                       unsigned long ip, const char *fmt, va_list args)
2528 {
2529         struct trace_event_call *call = &event_print;
2530         struct ring_buffer_event *event;
2531         int len = 0, size, pc;
2532         struct print_entry *entry;
2533         unsigned long flags;
2534         char *tbuffer;
2535
2536         if (tracing_disabled || tracing_selftest_running)
2537                 return 0;
2538
2539         /* Don't pollute graph traces with trace_vprintk internals */
2540         pause_graph_tracing();
2541
2542         pc = preempt_count();
2543         preempt_disable_notrace();
2544
2545
2546         tbuffer = get_trace_buf();
2547         if (!tbuffer) {
2548                 len = 0;
2549                 goto out_nobuffer;
2550         }
2551
2552         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2553
2554         local_save_flags(flags);
2555         size = sizeof(*entry) + len + 1;
2556         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2557                                           flags, pc);
2558         if (!event)
2559                 goto out;
2560         entry = ring_buffer_event_data(event);
2561         entry->ip = ip;
2562
2563         memcpy(&entry->buf, tbuffer, len + 1);
2564         if (!call_filter_check_discard(call, entry, buffer, event)) {
2565                 __buffer_unlock_commit(buffer, event);
2566                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2567         }
2568
2569 out:
2570         put_trace_buf();
2571
2572 out_nobuffer:
2573         preempt_enable_notrace();
2574         unpause_graph_tracing();
2575
2576         return len;
2577 }
2578
2579 __printf(3, 0)
2580 int trace_array_vprintk(struct trace_array *tr,
2581                         unsigned long ip, const char *fmt, va_list args)
2582 {
2583         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2584 }
2585
2586 __printf(3, 0)
2587 int trace_array_printk(struct trace_array *tr,
2588                        unsigned long ip, const char *fmt, ...)
2589 {
2590         int ret;
2591         va_list ap;
2592
2593         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2594                 return 0;
2595
2596         if (!tr)
2597                 return -ENOENT;
2598
2599         va_start(ap, fmt);
2600         ret = trace_array_vprintk(tr, ip, fmt, ap);
2601         va_end(ap);
2602         return ret;
2603 }
2604
2605 __printf(3, 4)
2606 int trace_array_printk_buf(struct ring_buffer *buffer,
2607                            unsigned long ip, const char *fmt, ...)
2608 {
2609         int ret;
2610         va_list ap;
2611
2612         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2613                 return 0;
2614
2615         va_start(ap, fmt);
2616         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2617         va_end(ap);
2618         return ret;
2619 }
2620
2621 __printf(2, 0)
2622 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2623 {
2624         return trace_array_vprintk(&global_trace, ip, fmt, args);
2625 }
2626 EXPORT_SYMBOL_GPL(trace_vprintk);
2627
2628 static void trace_iterator_increment(struct trace_iterator *iter)
2629 {
2630         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2631
2632         iter->idx++;
2633         if (buf_iter)
2634                 ring_buffer_read(buf_iter, NULL);
2635 }
2636
2637 static struct trace_entry *
2638 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2639                 unsigned long *lost_events)
2640 {
2641         struct ring_buffer_event *event;
2642         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2643
2644         if (buf_iter)
2645                 event = ring_buffer_iter_peek(buf_iter, ts);
2646         else
2647                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2648                                          lost_events);
2649
2650         if (event) {
2651                 iter->ent_size = ring_buffer_event_length(event);
2652                 return ring_buffer_event_data(event);
2653         }
2654         iter->ent_size = 0;
2655         return NULL;
2656 }
2657
2658 static struct trace_entry *
2659 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2660                   unsigned long *missing_events, u64 *ent_ts)
2661 {
2662         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2663         struct trace_entry *ent, *next = NULL;
2664         unsigned long lost_events = 0, next_lost = 0;
2665         int cpu_file = iter->cpu_file;
2666         u64 next_ts = 0, ts;
2667         int next_cpu = -1;
2668         int next_size = 0;
2669         int cpu;
2670
2671         /*
2672          * If we are in a per_cpu trace file, don't bother by iterating over
2673          * all cpu and peek directly.
2674          */
2675         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2676                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2677                         return NULL;
2678                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2679                 if (ent_cpu)
2680                         *ent_cpu = cpu_file;
2681
2682                 return ent;
2683         }
2684
2685         for_each_tracing_cpu(cpu) {
2686
2687                 if (ring_buffer_empty_cpu(buffer, cpu))
2688                         continue;
2689
2690                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2691
2692                 /*
2693                  * Pick the entry with the smallest timestamp:
2694                  */
2695                 if (ent && (!next || ts < next_ts)) {
2696                         next = ent;
2697                         next_cpu = cpu;
2698                         next_ts = ts;
2699                         next_lost = lost_events;
2700                         next_size = iter->ent_size;
2701                 }
2702         }
2703
2704         iter->ent_size = next_size;
2705
2706         if (ent_cpu)
2707                 *ent_cpu = next_cpu;
2708
2709         if (ent_ts)
2710                 *ent_ts = next_ts;
2711
2712         if (missing_events)
2713                 *missing_events = next_lost;
2714
2715         return next;
2716 }
2717
2718 /* Find the next real entry, without updating the iterator itself */
2719 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2720                                           int *ent_cpu, u64 *ent_ts)
2721 {
2722         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2723 }
2724
2725 /* Find the next real entry, and increment the iterator to the next entry */
2726 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2727 {
2728         iter->ent = __find_next_entry(iter, &iter->cpu,
2729                                       &iter->lost_events, &iter->ts);
2730
2731         if (iter->ent)
2732                 trace_iterator_increment(iter);
2733
2734         return iter->ent ? iter : NULL;
2735 }
2736
2737 static void trace_consume(struct trace_iterator *iter)
2738 {
2739         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2740                             &iter->lost_events);
2741 }
2742
2743 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2744 {
2745         struct trace_iterator *iter = m->private;
2746         int i = (int)*pos;
2747         void *ent;
2748
2749         WARN_ON_ONCE(iter->leftover);
2750
2751         (*pos)++;
2752
2753         /* can't go backwards */
2754         if (iter->idx > i)
2755                 return NULL;
2756
2757         if (iter->idx < 0)
2758                 ent = trace_find_next_entry_inc(iter);
2759         else
2760                 ent = iter;
2761
2762         while (ent && iter->idx < i)
2763                 ent = trace_find_next_entry_inc(iter);
2764
2765         iter->pos = *pos;
2766
2767         return ent;
2768 }
2769
2770 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2771 {
2772         struct ring_buffer_event *event;
2773         struct ring_buffer_iter *buf_iter;
2774         unsigned long entries = 0;
2775         u64 ts;
2776
2777         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2778
2779         buf_iter = trace_buffer_iter(iter, cpu);
2780         if (!buf_iter)
2781                 return;
2782
2783         ring_buffer_iter_reset(buf_iter);
2784
2785         /*
2786          * We could have the case with the max latency tracers
2787          * that a reset never took place on a cpu. This is evident
2788          * by the timestamp being before the start of the buffer.
2789          */
2790         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2791                 if (ts >= iter->trace_buffer->time_start)
2792                         break;
2793                 entries++;
2794                 ring_buffer_read(buf_iter, NULL);
2795         }
2796
2797         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2798 }
2799
2800 /*
2801  * The current tracer is copied to avoid a global locking
2802  * all around.
2803  */
2804 static void *s_start(struct seq_file *m, loff_t *pos)
2805 {
2806         struct trace_iterator *iter = m->private;
2807         struct trace_array *tr = iter->tr;
2808         int cpu_file = iter->cpu_file;
2809         void *p = NULL;
2810         loff_t l = 0;
2811         int cpu;
2812
2813         /*
2814          * copy the tracer to avoid using a global lock all around.
2815          * iter->trace is a copy of current_trace, the pointer to the
2816          * name may be used instead of a strcmp(), as iter->trace->name
2817          * will point to the same string as current_trace->name.
2818          */
2819         mutex_lock(&trace_types_lock);
2820         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2821                 *iter->trace = *tr->current_trace;
2822         mutex_unlock(&trace_types_lock);
2823
2824 #ifdef CONFIG_TRACER_MAX_TRACE
2825         if (iter->snapshot && iter->trace->use_max_tr)
2826                 return ERR_PTR(-EBUSY);
2827 #endif
2828
2829         if (*pos != iter->pos) {
2830                 iter->ent = NULL;
2831                 iter->cpu = 0;
2832                 iter->idx = -1;
2833
2834                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2835                         for_each_tracing_cpu(cpu)
2836                                 tracing_iter_reset(iter, cpu);
2837                 } else
2838                         tracing_iter_reset(iter, cpu_file);
2839
2840                 iter->leftover = 0;
2841                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2842                         ;
2843
2844         } else {
2845                 /*
2846                  * If we overflowed the seq_file before, then we want
2847                  * to just reuse the trace_seq buffer again.
2848                  */
2849                 if (iter->leftover)
2850                         p = iter;
2851                 else {
2852                         l = *pos - 1;
2853                         p = s_next(m, p, &l);
2854                 }
2855         }
2856
2857         trace_event_read_lock();
2858         trace_access_lock(cpu_file);
2859         return p;
2860 }
2861
2862 static void s_stop(struct seq_file *m, void *p)
2863 {
2864         struct trace_iterator *iter = m->private;
2865
2866 #ifdef CONFIG_TRACER_MAX_TRACE
2867         if (iter->snapshot && iter->trace->use_max_tr)
2868                 return;
2869 #endif
2870
2871         trace_access_unlock(iter->cpu_file);
2872         trace_event_read_unlock();
2873 }
2874
2875 static void
2876 get_total_entries(struct trace_buffer *buf,
2877                   unsigned long *total, unsigned long *entries)
2878 {
2879         unsigned long count;
2880         int cpu;
2881
2882         *total = 0;
2883         *entries = 0;
2884
2885         for_each_tracing_cpu(cpu) {
2886                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2887                 /*
2888                  * If this buffer has skipped entries, then we hold all
2889                  * entries for the trace and we need to ignore the
2890                  * ones before the time stamp.
2891                  */
2892                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2893                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2894                         /* total is the same as the entries */
2895                         *total += count;
2896                 } else
2897                         *total += count +
2898                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2899                 *entries += count;
2900         }
2901 }
2902
2903 static void print_lat_help_header(struct seq_file *m)
2904 {
2905         seq_puts(m, "#                  _------=> CPU#            \n"
2906                     "#                 / _-----=> irqs-off        \n"
2907                     "#                | / _----=> need-resched    \n"
2908                     "#                || / _---=> hardirq/softirq \n"
2909                     "#                ||| / _--=> preempt-depth   \n"
2910                     "#                |||| /     delay            \n"
2911                     "#  cmd     pid   ||||| time  |   caller      \n"
2912                     "#     \\   /      |||||  \\    |   /         \n");
2913 }
2914
2915 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2916 {
2917         unsigned long total;
2918         unsigned long entries;
2919
2920         get_total_entries(buf, &total, &entries);
2921         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2922                    entries, total, num_online_cpus());
2923         seq_puts(m, "#\n");
2924 }
2925
2926 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2927 {
2928         print_event_info(buf, m);
2929         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2930                     "#              | |       |          |         |\n");
2931 }
2932
2933 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2934 {
2935         print_event_info(buf, m);
2936         seq_puts(m, "#                              _-----=> irqs-off\n"
2937                     "#                             / _----=> need-resched\n"
2938                     "#                            | / _---=> hardirq/softirq\n"
2939                     "#                            || / _--=> preempt-depth\n"
2940                     "#                            ||| /     delay\n"
2941                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2942                     "#              | |       |   ||||       |         |\n");
2943 }
2944
2945 void
2946 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2947 {
2948         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2949         struct trace_buffer *buf = iter->trace_buffer;
2950         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2951         struct tracer *type = iter->trace;
2952         unsigned long entries;
2953         unsigned long total;
2954         const char *name = "preemption";
2955
2956         name = type->name;
2957
2958         get_total_entries(buf, &total, &entries);
2959
2960         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2961                    name, UTS_RELEASE);
2962         seq_puts(m, "# -----------------------------------"
2963                  "---------------------------------\n");
2964         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2965                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2966                    nsecs_to_usecs(data->saved_latency),
2967                    entries,
2968                    total,
2969                    buf->cpu,
2970 #if defined(CONFIG_PREEMPT_NONE)
2971                    "server",
2972 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2973                    "desktop",
2974 #elif defined(CONFIG_PREEMPT)
2975                    "preempt",
2976 #else
2977                    "unknown",
2978 #endif
2979                    /* These are reserved for later use */
2980                    0, 0, 0, 0);
2981 #ifdef CONFIG_SMP
2982         seq_printf(m, " #P:%d)\n", num_online_cpus());
2983 #else
2984         seq_puts(m, ")\n");
2985 #endif
2986         seq_puts(m, "#    -----------------\n");
2987         seq_printf(m, "#    | task: %.16s-%d "
2988                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2989                    data->comm, data->pid,
2990                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2991                    data->policy, data->rt_priority);
2992         seq_puts(m, "#    -----------------\n");
2993
2994         if (data->critical_start) {
2995                 seq_puts(m, "#  => started at: ");
2996                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2997                 trace_print_seq(m, &iter->seq);
2998                 seq_puts(m, "\n#  => ended at:   ");
2999                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3000                 trace_print_seq(m, &iter->seq);
3001                 seq_puts(m, "\n#\n");
3002         }
3003
3004         seq_puts(m, "#\n");
3005 }
3006
3007 static void test_cpu_buff_start(struct trace_iterator *iter)
3008 {
3009         struct trace_seq *s = &iter->seq;
3010         struct trace_array *tr = iter->tr;
3011
3012         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3013                 return;
3014
3015         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3016                 return;
3017
3018         if (cpumask_available(iter->started) &&
3019             cpumask_test_cpu(iter->cpu, iter->started))
3020                 return;
3021
3022         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3023                 return;
3024
3025         if (cpumask_available(iter->started))
3026                 cpumask_set_cpu(iter->cpu, iter->started);
3027
3028         /* Don't print started cpu buffer for the first entry of the trace */
3029         if (iter->idx > 1)
3030                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3031                                 iter->cpu);
3032 }
3033
3034 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3035 {
3036         struct trace_array *tr = iter->tr;
3037         struct trace_seq *s = &iter->seq;
3038         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3039         struct trace_entry *entry;
3040         struct trace_event *event;
3041
3042         entry = iter->ent;
3043
3044         test_cpu_buff_start(iter);
3045
3046         event = ftrace_find_event(entry->type);
3047
3048         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3049                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3050                         trace_print_lat_context(iter);
3051                 else
3052                         trace_print_context(iter);
3053         }
3054
3055         if (trace_seq_has_overflowed(s))
3056                 return TRACE_TYPE_PARTIAL_LINE;
3057
3058         if (event)
3059                 return event->funcs->trace(iter, sym_flags, event);
3060
3061         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3062
3063         return trace_handle_return(s);
3064 }
3065
3066 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3067 {
3068         struct trace_array *tr = iter->tr;
3069         struct trace_seq *s = &iter->seq;
3070         struct trace_entry *entry;
3071         struct trace_event *event;
3072
3073         entry = iter->ent;
3074
3075         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3076                 trace_seq_printf(s, "%d %d %llu ",
3077                                  entry->pid, iter->cpu, iter->ts);
3078
3079         if (trace_seq_has_overflowed(s))
3080                 return TRACE_TYPE_PARTIAL_LINE;
3081
3082         event = ftrace_find_event(entry->type);
3083         if (event)
3084                 return event->funcs->raw(iter, 0, event);
3085
3086         trace_seq_printf(s, "%d ?\n", entry->type);
3087
3088         return trace_handle_return(s);
3089 }
3090
3091 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3092 {
3093         struct trace_array *tr = iter->tr;
3094         struct trace_seq *s = &iter->seq;
3095         unsigned char newline = '\n';
3096         struct trace_entry *entry;
3097         struct trace_event *event;
3098
3099         entry = iter->ent;
3100
3101         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3102                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3103                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3104                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3105                 if (trace_seq_has_overflowed(s))
3106                         return TRACE_TYPE_PARTIAL_LINE;
3107         }
3108
3109         event = ftrace_find_event(entry->type);
3110         if (event) {
3111                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3112                 if (ret != TRACE_TYPE_HANDLED)
3113                         return ret;
3114         }
3115
3116         SEQ_PUT_FIELD(s, newline);
3117
3118         return trace_handle_return(s);
3119 }
3120
3121 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3122 {
3123         struct trace_array *tr = iter->tr;
3124         struct trace_seq *s = &iter->seq;
3125         struct trace_entry *entry;
3126         struct trace_event *event;
3127
3128         entry = iter->ent;
3129
3130         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3131                 SEQ_PUT_FIELD(s, entry->pid);
3132                 SEQ_PUT_FIELD(s, iter->cpu);
3133                 SEQ_PUT_FIELD(s, iter->ts);
3134                 if (trace_seq_has_overflowed(s))
3135                         return TRACE_TYPE_PARTIAL_LINE;
3136         }
3137
3138         event = ftrace_find_event(entry->type);
3139         return event ? event->funcs->binary(iter, 0, event) :
3140                 TRACE_TYPE_HANDLED;
3141 }
3142
3143 int trace_empty(struct trace_iterator *iter)
3144 {
3145         struct ring_buffer_iter *buf_iter;
3146         int cpu;
3147
3148         /* If we are looking at one CPU buffer, only check that one */
3149         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3150                 cpu = iter->cpu_file;
3151                 buf_iter = trace_buffer_iter(iter, cpu);
3152                 if (buf_iter) {
3153                         if (!ring_buffer_iter_empty(buf_iter))
3154                                 return 0;
3155                 } else {
3156                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3157                                 return 0;
3158                 }
3159                 return 1;
3160         }
3161
3162         for_each_tracing_cpu(cpu) {
3163                 buf_iter = trace_buffer_iter(iter, cpu);
3164                 if (buf_iter) {
3165                         if (!ring_buffer_iter_empty(buf_iter))
3166                                 return 0;
3167                 } else {
3168                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3169                                 return 0;
3170                 }
3171         }
3172
3173         return 1;
3174 }
3175
3176 /*  Called with trace_event_read_lock() held. */
3177 enum print_line_t print_trace_line(struct trace_iterator *iter)
3178 {
3179         struct trace_array *tr = iter->tr;
3180         unsigned long trace_flags = tr->trace_flags;
3181         enum print_line_t ret;
3182
3183         if (iter->lost_events) {
3184                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3185                                  iter->cpu, iter->lost_events);
3186                 if (trace_seq_has_overflowed(&iter->seq))
3187                         return TRACE_TYPE_PARTIAL_LINE;
3188         }
3189
3190         if (iter->trace && iter->trace->print_line) {
3191                 ret = iter->trace->print_line(iter);
3192                 if (ret != TRACE_TYPE_UNHANDLED)
3193                         return ret;
3194         }
3195
3196         if (iter->ent->type == TRACE_BPUTS &&
3197                         trace_flags & TRACE_ITER_PRINTK &&
3198                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3199                 return trace_print_bputs_msg_only(iter);
3200
3201         if (iter->ent->type == TRACE_BPRINT &&
3202                         trace_flags & TRACE_ITER_PRINTK &&
3203                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3204                 return trace_print_bprintk_msg_only(iter);
3205
3206         if (iter->ent->type == TRACE_PRINT &&
3207                         trace_flags & TRACE_ITER_PRINTK &&
3208                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3209                 return trace_print_printk_msg_only(iter);
3210
3211         if (trace_flags & TRACE_ITER_BIN)
3212                 return print_bin_fmt(iter);
3213
3214         if (trace_flags & TRACE_ITER_HEX)
3215                 return print_hex_fmt(iter);
3216
3217         if (trace_flags & TRACE_ITER_RAW)
3218                 return print_raw_fmt(iter);
3219
3220         return print_trace_fmt(iter);
3221 }
3222
3223 void trace_latency_header(struct seq_file *m)
3224 {
3225         struct trace_iterator *iter = m->private;
3226         struct trace_array *tr = iter->tr;
3227
3228         /* print nothing if the buffers are empty */
3229         if (trace_empty(iter))
3230                 return;
3231
3232         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3233                 print_trace_header(m, iter);
3234
3235         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3236                 print_lat_help_header(m);
3237 }
3238
3239 void trace_default_header(struct seq_file *m)
3240 {
3241         struct trace_iterator *iter = m->private;
3242         struct trace_array *tr = iter->tr;
3243         unsigned long trace_flags = tr->trace_flags;
3244
3245         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3246                 return;
3247
3248         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3249                 /* print nothing if the buffers are empty */
3250                 if (trace_empty(iter))
3251                         return;
3252                 print_trace_header(m, iter);
3253                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3254                         print_lat_help_header(m);
3255         } else {
3256                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3257                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3258                                 print_func_help_header_irq(iter->trace_buffer, m);
3259                         else
3260                                 print_func_help_header(iter->trace_buffer, m);
3261                 }
3262         }
3263 }
3264
3265 static void test_ftrace_alive(struct seq_file *m)
3266 {
3267         if (!ftrace_is_dead())
3268                 return;
3269         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3270                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3271 }
3272
3273 #ifdef CONFIG_TRACER_MAX_TRACE
3274 static void show_snapshot_main_help(struct seq_file *m)
3275 {
3276         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3277                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278                     "#                      Takes a snapshot of the main buffer.\n"
3279                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3280                     "#                      (Doesn't have to be '2' works with any number that\n"
3281                     "#                       is not a '0' or '1')\n");
3282 }
3283
3284 static void show_snapshot_percpu_help(struct seq_file *m)
3285 {
3286         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3287 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3288         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3289                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3290 #else
3291         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3292                     "#                     Must use main snapshot file to allocate.\n");
3293 #endif
3294         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3295                     "#                      (Doesn't have to be '2' works with any number that\n"
3296                     "#                       is not a '0' or '1')\n");
3297 }
3298
3299 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3300 {
3301         if (iter->tr->allocated_snapshot)
3302                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3303         else
3304                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3305
3306         seq_puts(m, "# Snapshot commands:\n");
3307         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3308                 show_snapshot_main_help(m);
3309         else
3310                 show_snapshot_percpu_help(m);
3311 }
3312 #else
3313 /* Should never be called */
3314 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3315 #endif
3316
3317 static int s_show(struct seq_file *m, void *v)
3318 {
3319         struct trace_iterator *iter = v;
3320         int ret;
3321
3322         if (iter->ent == NULL) {
3323                 if (iter->tr) {
3324                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3325                         seq_puts(m, "#\n");
3326                         test_ftrace_alive(m);
3327                 }
3328                 if (iter->snapshot && trace_empty(iter))
3329                         print_snapshot_help(m, iter);
3330                 else if (iter->trace && iter->trace->print_header)
3331                         iter->trace->print_header(m);
3332                 else
3333                         trace_default_header(m);
3334
3335         } else if (iter->leftover) {
3336                 /*
3337                  * If we filled the seq_file buffer earlier, we
3338                  * want to just show it now.
3339                  */
3340                 ret = trace_print_seq(m, &iter->seq);
3341
3342                 /* ret should this time be zero, but you never know */
3343                 iter->leftover = ret;
3344
3345         } else {
3346                 print_trace_line(iter);
3347                 ret = trace_print_seq(m, &iter->seq);
3348                 /*
3349                  * If we overflow the seq_file buffer, then it will
3350                  * ask us for this data again at start up.
3351                  * Use that instead.
3352                  *  ret is 0 if seq_file write succeeded.
3353                  *        -1 otherwise.
3354                  */
3355                 iter->leftover = ret;
3356         }
3357
3358         return 0;
3359 }
3360
3361 /*
3362  * Should be used after trace_array_get(), trace_types_lock
3363  * ensures that i_cdev was already initialized.
3364  */
3365 static inline int tracing_get_cpu(struct inode *inode)
3366 {
3367         if (inode->i_cdev) /* See trace_create_cpu_file() */
3368                 return (long)inode->i_cdev - 1;
3369         return RING_BUFFER_ALL_CPUS;
3370 }
3371
3372 static const struct seq_operations tracer_seq_ops = {
3373         .start          = s_start,
3374         .next           = s_next,
3375         .stop           = s_stop,
3376         .show           = s_show,
3377 };
3378
3379 static struct trace_iterator *
3380 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3381 {
3382         struct trace_array *tr = inode->i_private;
3383         struct trace_iterator *iter;
3384         int cpu;
3385
3386         if (tracing_disabled)
3387                 return ERR_PTR(-ENODEV);
3388
3389         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3390         if (!iter)
3391                 return ERR_PTR(-ENOMEM);
3392
3393         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3394                                     GFP_KERNEL);
3395         if (!iter->buffer_iter)
3396                 goto release;
3397
3398         /*
3399          * We make a copy of the current tracer to avoid concurrent
3400          * changes on it while we are reading.
3401          */
3402         mutex_lock(&trace_types_lock);
3403         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3404         if (!iter->trace)
3405                 goto fail;
3406
3407         *iter->trace = *tr->current_trace;
3408
3409         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3410                 goto fail;
3411
3412         iter->tr = tr;
3413
3414 #ifdef CONFIG_TRACER_MAX_TRACE
3415         /* Currently only the top directory has a snapshot */
3416         if (tr->current_trace->print_max || snapshot)
3417                 iter->trace_buffer = &tr->max_buffer;
3418         else
3419 #endif
3420                 iter->trace_buffer = &tr->trace_buffer;
3421         iter->snapshot = snapshot;
3422         iter->pos = -1;
3423         iter->cpu_file = tracing_get_cpu(inode);
3424         mutex_init(&iter->mutex);
3425
3426         /* Notify the tracer early; before we stop tracing. */
3427         if (iter->trace && iter->trace->open)
3428                 iter->trace->open(iter);
3429
3430         /* Annotate start of buffers if we had overruns */
3431         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3432                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3433
3434         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3435         if (trace_clocks[tr->clock_id].in_ns)
3436                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3437
3438         /* stop the trace while dumping if we are not opening "snapshot" */
3439         if (!iter->snapshot)
3440                 tracing_stop_tr(tr);
3441
3442         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3443                 for_each_tracing_cpu(cpu) {
3444                         iter->buffer_iter[cpu] =
3445                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3446                                                          cpu, GFP_KERNEL);
3447                 }
3448                 ring_buffer_read_prepare_sync();
3449                 for_each_tracing_cpu(cpu) {
3450                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3451                         tracing_iter_reset(iter, cpu);
3452                 }
3453         } else {
3454                 cpu = iter->cpu_file;
3455                 iter->buffer_iter[cpu] =
3456                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3457                                                  cpu, GFP_KERNEL);
3458                 ring_buffer_read_prepare_sync();
3459                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3460                 tracing_iter_reset(iter, cpu);
3461         }
3462
3463         mutex_unlock(&trace_types_lock);
3464
3465         return iter;
3466
3467  fail:
3468         mutex_unlock(&trace_types_lock);
3469         kfree(iter->trace);
3470         kfree(iter->buffer_iter);
3471 release:
3472         seq_release_private(inode, file);
3473         return ERR_PTR(-ENOMEM);
3474 }
3475
3476 int tracing_open_generic(struct inode *inode, struct file *filp)
3477 {
3478         if (tracing_disabled)
3479                 return -ENODEV;
3480
3481         filp->private_data = inode->i_private;
3482         return 0;
3483 }
3484
3485 bool tracing_is_disabled(void)
3486 {
3487         return (tracing_disabled) ? true: false;
3488 }
3489
3490 /*
3491  * Open and update trace_array ref count.
3492  * Must have the current trace_array passed to it.
3493  */
3494 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3495 {
3496         struct trace_array *tr = inode->i_private;
3497
3498         if (tracing_disabled)
3499                 return -ENODEV;
3500
3501         if (trace_array_get(tr) < 0)
3502                 return -ENODEV;
3503
3504         filp->private_data = inode->i_private;
3505
3506         return 0;
3507 }
3508
3509 static int tracing_release(struct inode *inode, struct file *file)
3510 {
3511         struct trace_array *tr = inode->i_private;
3512         struct seq_file *m = file->private_data;
3513         struct trace_iterator *iter;
3514         int cpu;
3515
3516         if (!(file->f_mode & FMODE_READ)) {
3517                 trace_array_put(tr);
3518                 return 0;
3519         }
3520
3521         /* Writes do not use seq_file */
3522         iter = m->private;
3523         mutex_lock(&trace_types_lock);
3524
3525         for_each_tracing_cpu(cpu) {
3526                 if (iter->buffer_iter[cpu])
3527                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3528         }
3529
3530         if (iter->trace && iter->trace->close)
3531                 iter->trace->close(iter);
3532
3533         if (!iter->snapshot)
3534                 /* reenable tracing if it was previously enabled */
3535                 tracing_start_tr(tr);
3536
3537         __trace_array_put(tr);
3538
3539         mutex_unlock(&trace_types_lock);
3540
3541         mutex_destroy(&iter->mutex);
3542         free_cpumask_var(iter->started);
3543         kfree(iter->trace);
3544         kfree(iter->buffer_iter);
3545         seq_release_private(inode, file);
3546
3547         return 0;
3548 }
3549
3550 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3551 {
3552         struct trace_array *tr = inode->i_private;
3553
3554         trace_array_put(tr);
3555         return 0;
3556 }
3557
3558 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3559 {
3560         struct trace_array *tr = inode->i_private;
3561
3562         trace_array_put(tr);
3563
3564         return single_release(inode, file);
3565 }
3566
3567 static int tracing_open(struct inode *inode, struct file *file)
3568 {
3569         struct trace_array *tr = inode->i_private;
3570         struct trace_iterator *iter;
3571         int ret = 0;
3572
3573         if (trace_array_get(tr) < 0)
3574                 return -ENODEV;
3575
3576         /* If this file was open for write, then erase contents */
3577         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3578                 int cpu = tracing_get_cpu(inode);
3579                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3580
3581 #ifdef CONFIG_TRACER_MAX_TRACE
3582                 if (tr->current_trace->print_max)
3583                         trace_buf = &tr->max_buffer;
3584 #endif
3585
3586                 if (cpu == RING_BUFFER_ALL_CPUS)
3587                         tracing_reset_online_cpus(trace_buf);
3588                 else
3589                         tracing_reset(trace_buf, cpu);
3590         }
3591
3592         if (file->f_mode & FMODE_READ) {
3593                 iter = __tracing_open(inode, file, false);
3594                 if (IS_ERR(iter))
3595                         ret = PTR_ERR(iter);
3596                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3597                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3598         }
3599
3600         if (ret < 0)
3601                 trace_array_put(tr);
3602
3603         return ret;
3604 }
3605
3606 /*
3607  * Some tracers are not suitable for instance buffers.
3608  * A tracer is always available for the global array (toplevel)
3609  * or if it explicitly states that it is.
3610  */
3611 static bool
3612 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3613 {
3614         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3615 }
3616
3617 /* Find the next tracer that this trace array may use */
3618 static struct tracer *
3619 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3620 {
3621         while (t && !trace_ok_for_array(t, tr))
3622                 t = t->next;
3623
3624         return t;
3625 }
3626
3627 static void *
3628 t_next(struct seq_file *m, void *v, loff_t *pos)
3629 {
3630         struct trace_array *tr = m->private;
3631         struct tracer *t = v;
3632
3633         (*pos)++;
3634
3635         if (t)
3636                 t = get_tracer_for_array(tr, t->next);
3637
3638         return t;
3639 }
3640
3641 static void *t_start(struct seq_file *m, loff_t *pos)
3642 {
3643         struct trace_array *tr = m->private;
3644         struct tracer *t;
3645         loff_t l = 0;
3646
3647         mutex_lock(&trace_types_lock);
3648
3649         t = get_tracer_for_array(tr, trace_types);
3650         for (; t && l < *pos; t = t_next(m, t, &l))
3651                         ;
3652
3653         return t;
3654 }
3655
3656 static void t_stop(struct seq_file *m, void *p)
3657 {
3658         mutex_unlock(&trace_types_lock);
3659 }
3660
3661 static int t_show(struct seq_file *m, void *v)
3662 {
3663         struct tracer *t = v;
3664
3665         if (!t)
3666                 return 0;
3667
3668         seq_puts(m, t->name);
3669         if (t->next)
3670                 seq_putc(m, ' ');
3671         else
3672                 seq_putc(m, '\n');
3673
3674         return 0;
3675 }
3676
3677 static const struct seq_operations show_traces_seq_ops = {
3678         .start          = t_start,
3679         .next           = t_next,
3680         .stop           = t_stop,
3681         .show           = t_show,
3682 };
3683
3684 static int show_traces_open(struct inode *inode, struct file *file)
3685 {
3686         struct trace_array *tr = inode->i_private;
3687         struct seq_file *m;
3688         int ret;
3689
3690         if (tracing_disabled)
3691                 return -ENODEV;
3692
3693         if (trace_array_get(tr) < 0)
3694                 return -ENODEV;
3695
3696         ret = seq_open(file, &show_traces_seq_ops);
3697         if (ret) {
3698                 trace_array_put(tr);
3699                 return ret;
3700         }
3701
3702         m = file->private_data;
3703         m->private = tr;
3704
3705         return 0;
3706 }
3707
3708 static int show_traces_release(struct inode *inode, struct file *file)
3709 {
3710         struct trace_array *tr = inode->i_private;
3711
3712         trace_array_put(tr);
3713         return seq_release(inode, file);
3714 }
3715
3716 static ssize_t
3717 tracing_write_stub(struct file *filp, const char __user *ubuf,
3718                    size_t count, loff_t *ppos)
3719 {
3720         return count;
3721 }
3722
3723 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3724 {
3725         int ret;
3726
3727         if (file->f_mode & FMODE_READ)
3728                 ret = seq_lseek(file, offset, whence);
3729         else
3730                 file->f_pos = ret = 0;
3731
3732         return ret;
3733 }
3734
3735 static const struct file_operations tracing_fops = {
3736         .open           = tracing_open,
3737         .read           = seq_read,
3738         .write          = tracing_write_stub,
3739         .llseek         = tracing_lseek,
3740         .release        = tracing_release,
3741 };
3742
3743 static const struct file_operations show_traces_fops = {
3744         .open           = show_traces_open,
3745         .read           = seq_read,
3746         .llseek         = seq_lseek,
3747         .release        = show_traces_release,
3748 };
3749
3750 static ssize_t
3751 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3752                      size_t count, loff_t *ppos)
3753 {
3754         struct trace_array *tr = file_inode(filp)->i_private;
3755         char *mask_str;
3756         int len;
3757
3758         len = snprintf(NULL, 0, "%*pb\n",
3759                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3760         mask_str = kmalloc(len, GFP_KERNEL);
3761         if (!mask_str)
3762                 return -ENOMEM;
3763
3764         len = snprintf(mask_str, len, "%*pb\n",
3765                        cpumask_pr_args(tr->tracing_cpumask));
3766         if (len >= count) {
3767                 count = -EINVAL;
3768                 goto out_err;
3769         }
3770         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3771
3772 out_err:
3773         kfree(mask_str);
3774
3775         return count;
3776 }
3777
3778 static ssize_t
3779 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3780                       size_t count, loff_t *ppos)
3781 {
3782         struct trace_array *tr = file_inode(filp)->i_private;
3783         cpumask_var_t tracing_cpumask_new;
3784         int err, cpu;
3785
3786         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3787                 return -ENOMEM;
3788
3789         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3790         if (err)
3791                 goto err_unlock;
3792
3793         local_irq_disable();
3794         arch_spin_lock(&tr->max_lock);
3795         for_each_tracing_cpu(cpu) {
3796                 /*
3797                  * Increase/decrease the disabled counter if we are
3798                  * about to flip a bit in the cpumask:
3799                  */
3800                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3801                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3802                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3803                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3804                 }
3805                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3806                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3807                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3808                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3809                 }
3810         }
3811         arch_spin_unlock(&tr->max_lock);
3812         local_irq_enable();
3813
3814         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3815         free_cpumask_var(tracing_cpumask_new);
3816
3817         return count;
3818
3819 err_unlock:
3820         free_cpumask_var(tracing_cpumask_new);
3821
3822         return err;
3823 }
3824
3825 static const struct file_operations tracing_cpumask_fops = {
3826         .open           = tracing_open_generic_tr,
3827         .read           = tracing_cpumask_read,
3828         .write          = tracing_cpumask_write,
3829         .release        = tracing_release_generic_tr,
3830         .llseek         = generic_file_llseek,
3831 };
3832
3833 static int tracing_trace_options_show(struct seq_file *m, void *v)
3834 {
3835         struct tracer_opt *trace_opts;
3836         struct trace_array *tr = m->private;
3837         u32 tracer_flags;
3838         int i;
3839
3840         mutex_lock(&trace_types_lock);
3841         tracer_flags = tr->current_trace->flags->val;
3842         trace_opts = tr->current_trace->flags->opts;
3843
3844         for (i = 0; trace_options[i]; i++) {
3845                 if (tr->trace_flags & (1 << i))
3846                         seq_printf(m, "%s\n", trace_options[i]);
3847                 else
3848                         seq_printf(m, "no%s\n", trace_options[i]);
3849         }
3850
3851         for (i = 0; trace_opts[i].name; i++) {
3852                 if (tracer_flags & trace_opts[i].bit)
3853                         seq_printf(m, "%s\n", trace_opts[i].name);
3854                 else
3855                         seq_printf(m, "no%s\n", trace_opts[i].name);
3856         }
3857         mutex_unlock(&trace_types_lock);
3858
3859         return 0;
3860 }
3861
3862 static int __set_tracer_option(struct trace_array *tr,
3863                                struct tracer_flags *tracer_flags,
3864                                struct tracer_opt *opts, int neg)
3865 {
3866         struct tracer *trace = tracer_flags->trace;
3867         int ret;
3868
3869         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3870         if (ret)
3871                 return ret;
3872
3873         if (neg)
3874                 tracer_flags->val &= ~opts->bit;
3875         else
3876                 tracer_flags->val |= opts->bit;
3877         return 0;
3878 }
3879
3880 /* Try to assign a tracer specific option */
3881 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3882 {
3883         struct tracer *trace = tr->current_trace;
3884         struct tracer_flags *tracer_flags = trace->flags;
3885         struct tracer_opt *opts = NULL;
3886         int i;
3887
3888         for (i = 0; tracer_flags->opts[i].name; i++) {
3889                 opts = &tracer_flags->opts[i];
3890
3891                 if (strcmp(cmp, opts->name) == 0)
3892                         return __set_tracer_option(tr, trace->flags, opts, neg);
3893         }
3894
3895         return -EINVAL;
3896 }
3897
3898 /* Some tracers require overwrite to stay enabled */
3899 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3900 {
3901         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3902                 return -1;
3903
3904         return 0;
3905 }
3906
3907 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3908 {
3909         /* do nothing if flag is already set */
3910         if (!!(tr->trace_flags & mask) == !!enabled)
3911                 return 0;
3912
3913         /* Give the tracer a chance to approve the change */
3914         if (tr->current_trace->flag_changed)
3915                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3916                         return -EINVAL;
3917
3918         if (enabled)
3919                 tr->trace_flags |= mask;
3920         else
3921                 tr->trace_flags &= ~mask;
3922
3923         if (mask == TRACE_ITER_RECORD_CMD)
3924                 trace_event_enable_cmd_record(enabled);
3925
3926         if (mask == TRACE_ITER_EVENT_FORK)
3927                 trace_event_follow_fork(tr, enabled);
3928
3929         if (mask == TRACE_ITER_OVERWRITE) {
3930                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3931 #ifdef CONFIG_TRACER_MAX_TRACE
3932                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3933 #endif
3934         }
3935
3936         if (mask == TRACE_ITER_PRINTK) {
3937                 trace_printk_start_stop_comm(enabled);
3938                 trace_printk_control(enabled);
3939         }
3940
3941         return 0;
3942 }
3943
3944 static int trace_set_options(struct trace_array *tr, char *option)
3945 {
3946         char *cmp;
3947         int neg = 0;
3948         int ret = -ENODEV;
3949         int i;
3950         size_t orig_len = strlen(option);
3951
3952         cmp = strstrip(option);
3953
3954         if (strncmp(cmp, "no", 2) == 0) {
3955                 neg = 1;
3956                 cmp += 2;
3957         }
3958
3959         mutex_lock(&trace_types_lock);
3960
3961         for (i = 0; trace_options[i]; i++) {
3962                 if (strcmp(cmp, trace_options[i]) == 0) {
3963                         ret = set_tracer_flag(tr, 1 << i, !neg);
3964                         break;
3965                 }
3966         }
3967
3968         /* If no option could be set, test the specific tracer options */
3969         if (!trace_options[i])
3970                 ret = set_tracer_option(tr, cmp, neg);
3971
3972         mutex_unlock(&trace_types_lock);
3973
3974         /*
3975          * If the first trailing whitespace is replaced with '\0' by strstrip,
3976          * turn it back into a space.
3977          */
3978         if (orig_len > strlen(option))
3979                 option[strlen(option)] = ' ';
3980
3981         return ret;
3982 }
3983
3984 static void __init apply_trace_boot_options(void)
3985 {
3986         char *buf = trace_boot_options_buf;
3987         char *option;
3988
3989         while (true) {
3990                 option = strsep(&buf, ",");
3991
3992                 if (!option)
3993                         break;
3994
3995                 if (*option)
3996                         trace_set_options(&global_trace, option);
3997
3998                 /* Put back the comma to allow this to be called again */
3999                 if (buf)
4000                         *(buf - 1) = ',';
4001         }
4002 }
4003
4004 static ssize_t
4005 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4006                         size_t cnt, loff_t *ppos)
4007 {
4008         struct seq_file *m = filp->private_data;
4009         struct trace_array *tr = m->private;
4010         char buf[64];
4011         int ret;
4012
4013         if (cnt >= sizeof(buf))
4014                 return -EINVAL;
4015
4016         if (copy_from_user(buf, ubuf, cnt))
4017                 return -EFAULT;
4018
4019         buf[cnt] = 0;
4020
4021         ret = trace_set_options(tr, buf);
4022         if (ret < 0)
4023                 return ret;
4024
4025         *ppos += cnt;
4026
4027         return cnt;
4028 }
4029
4030 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4031 {
4032         struct trace_array *tr = inode->i_private;
4033         int ret;
4034
4035         if (tracing_disabled)
4036                 return -ENODEV;
4037
4038         if (trace_array_get(tr) < 0)
4039                 return -ENODEV;
4040
4041         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4042         if (ret < 0)
4043                 trace_array_put(tr);
4044
4045         return ret;
4046 }
4047
4048 static const struct file_operations tracing_iter_fops = {
4049         .open           = tracing_trace_options_open,
4050         .read           = seq_read,
4051         .llseek         = seq_lseek,
4052         .release        = tracing_single_release_tr,
4053         .write          = tracing_trace_options_write,
4054 };
4055
4056 static const char readme_msg[] =
4057         "tracing mini-HOWTO:\n\n"
4058         "# echo 0 > tracing_on : quick way to disable tracing\n"
4059         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4060         " Important files:\n"
4061         "  trace\t\t\t- The static contents of the buffer\n"
4062         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4063         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4064         "  current_tracer\t- function and latency tracers\n"
4065         "  available_tracers\t- list of configured tracers for current_tracer\n"
4066         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4067         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4068         "  trace_clock\t\t-change the clock used to order events\n"
4069         "       local:   Per cpu clock but may not be synced across CPUs\n"
4070         "      global:   Synced across CPUs but slows tracing down.\n"
4071         "     counter:   Not a clock, but just an increment\n"
4072         "      uptime:   Jiffy counter from time of boot\n"
4073         "        perf:   Same clock that perf events use\n"
4074 #ifdef CONFIG_X86_64
4075         "     x86-tsc:   TSC cycle counter\n"
4076 #endif
4077         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4078         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4079         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4080         "\t\t\t  Remove sub-buffer with rmdir\n"
4081         "  trace_options\t\t- Set format or modify how tracing happens\n"
4082         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4083         "\t\t\t  option name\n"
4084         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4085 #ifdef CONFIG_DYNAMIC_FTRACE
4086         "\n  available_filter_functions - list of functions that can be filtered on\n"
4087         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4088         "\t\t\t  functions\n"
4089         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4090         "\t     modules: Can select a group via module\n"
4091         "\t      Format: :mod:<module-name>\n"
4092         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4093         "\t    triggers: a command to perform when function is hit\n"
4094         "\t      Format: <function>:<trigger>[:count]\n"
4095         "\t     trigger: traceon, traceoff\n"
4096         "\t\t      enable_event:<system>:<event>\n"
4097         "\t\t      disable_event:<system>:<event>\n"
4098 #ifdef CONFIG_STACKTRACE
4099         "\t\t      stacktrace\n"
4100 #endif
4101 #ifdef CONFIG_TRACER_SNAPSHOT
4102         "\t\t      snapshot\n"
4103 #endif
4104         "\t\t      dump\n"
4105         "\t\t      cpudump\n"
4106         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4107         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4108         "\t     The first one will disable tracing every time do_fault is hit\n"
4109         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4110         "\t       The first time do trap is hit and it disables tracing, the\n"
4111         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4112         "\t       the counter will not decrement. It only decrements when the\n"
4113         "\t       trigger did work\n"
4114         "\t     To remove trigger without count:\n"
4115         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4116         "\t     To remove trigger with a count:\n"
4117         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4118         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4119         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4120         "\t    modules: Can select a group via module command :mod:\n"
4121         "\t    Does not accept triggers\n"
4122 #endif /* CONFIG_DYNAMIC_FTRACE */
4123 #ifdef CONFIG_FUNCTION_TRACER
4124         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4125         "\t\t    (function)\n"
4126 #endif
4127 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4128         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4129         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4130         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4131 #endif
4132 #ifdef CONFIG_TRACER_SNAPSHOT
4133         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4134         "\t\t\t  snapshot buffer. Read the contents for more\n"
4135         "\t\t\t  information\n"
4136 #endif
4137 #ifdef CONFIG_STACK_TRACER
4138         "  stack_trace\t\t- Shows the max stack trace when active\n"
4139         "  stack_max_size\t- Shows current max stack size that was traced\n"
4140         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4141         "\t\t\t  new trace)\n"
4142 #ifdef CONFIG_DYNAMIC_FTRACE
4143         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4144         "\t\t\t  traces\n"
4145 #endif
4146 #endif /* CONFIG_STACK_TRACER */
4147 #ifdef CONFIG_KPROBE_EVENT
4148         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4149         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4150 #endif
4151 #ifdef CONFIG_UPROBE_EVENT
4152         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4153         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4154 #endif
4155 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4156         "\t  accepts: event-definitions (one definition per line)\n"
4157         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4158         "\t           -:[<group>/]<event>\n"
4159 #ifdef CONFIG_KPROBE_EVENT
4160         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4161 #endif
4162 #ifdef CONFIG_UPROBE_EVENT
4163         "\t    place: <path>:<offset>\n"
4164 #endif
4165         "\t     args: <name>=fetcharg[:type]\n"
4166         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4167         "\t           $stack<index>, $stack, $retval, $comm\n"
4168         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4169         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4170 #endif
4171         "  events/\t\t- Directory containing all trace event subsystems:\n"
4172         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4173         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4174         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4175         "\t\t\t  events\n"
4176         "      filter\t\t- If set, only events passing filter are traced\n"
4177         "  events/<system>/<event>/\t- Directory containing control files for\n"
4178         "\t\t\t  <event>:\n"
4179         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4180         "      filter\t\t- If set, only events passing filter are traced\n"
4181         "      trigger\t\t- If set, a command to perform when event is hit\n"
4182         "\t    Format: <trigger>[:count][if <filter>]\n"
4183         "\t   trigger: traceon, traceoff\n"
4184         "\t            enable_event:<system>:<event>\n"
4185         "\t            disable_event:<system>:<event>\n"
4186 #ifdef CONFIG_HIST_TRIGGERS
4187         "\t            enable_hist:<system>:<event>\n"
4188         "\t            disable_hist:<system>:<event>\n"
4189 #endif
4190 #ifdef CONFIG_STACKTRACE
4191         "\t\t    stacktrace\n"
4192 #endif
4193 #ifdef CONFIG_TRACER_SNAPSHOT
4194         "\t\t    snapshot\n"
4195 #endif
4196 #ifdef CONFIG_HIST_TRIGGERS
4197         "\t\t    hist (see below)\n"
4198 #endif
4199         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4200         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4201         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4202         "\t                  events/block/block_unplug/trigger\n"
4203         "\t   The first disables tracing every time block_unplug is hit.\n"
4204         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4205         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4206         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4207         "\t   Like function triggers, the counter is only decremented if it\n"
4208         "\t    enabled or disabled tracing.\n"
4209         "\t   To remove a trigger without a count:\n"
4210         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4211         "\t   To remove a trigger with a count:\n"
4212         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4213         "\t   Filters can be ignored when removing a trigger.\n"
4214 #ifdef CONFIG_HIST_TRIGGERS
4215         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4216         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4217         "\t            [:values=<field1[,field2,...]>]\n"
4218         "\t            [:sort=<field1[,field2,...]>]\n"
4219         "\t            [:size=#entries]\n"
4220         "\t            [:pause][:continue][:clear]\n"
4221         "\t            [:name=histname1]\n"
4222         "\t            [if <filter>]\n\n"
4223         "\t    When a matching event is hit, an entry is added to a hash\n"
4224         "\t    table using the key(s) and value(s) named, and the value of a\n"
4225         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4226         "\t    correspond to fields in the event's format description.  Keys\n"
4227         "\t    can be any field, or the special string 'stacktrace'.\n"
4228         "\t    Compound keys consisting of up to two fields can be specified\n"
4229         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4230         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4231         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4232         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4233         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4234         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4235         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4236         "\t    its histogram data will be shared with other triggers of the\n"
4237         "\t    same name, and trigger hits will update this common data.\n\n"
4238         "\t    Reading the 'hist' file for the event will dump the hash\n"
4239         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4240         "\t    triggers attached to an event, there will be a table for each\n"
4241         "\t    trigger in the output.  The table displayed for a named\n"
4242         "\t    trigger will be the same as any other instance having the\n"
4243         "\t    same name.  The default format used to display a given field\n"
4244         "\t    can be modified by appending any of the following modifiers\n"
4245         "\t    to the field name, as applicable:\n\n"
4246         "\t            .hex        display a number as a hex value\n"
4247         "\t            .sym        display an address as a symbol\n"
4248         "\t            .sym-offset display an address as a symbol and offset\n"
4249         "\t            .execname   display a common_pid as a program name\n"
4250         "\t            .syscall    display a syscall id as a syscall name\n\n"
4251         "\t            .log2       display log2 value rather than raw number\n\n"
4252         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4253         "\t    trigger or to start a hist trigger but not log any events\n"
4254         "\t    until told to do so.  'continue' can be used to start or\n"
4255         "\t    restart a paused hist trigger.\n\n"
4256         "\t    The 'clear' parameter will clear the contents of a running\n"
4257         "\t    hist trigger and leave its current paused/active state\n"
4258         "\t    unchanged.\n\n"
4259         "\t    The enable_hist and disable_hist triggers can be used to\n"
4260         "\t    have one event conditionally start and stop another event's\n"
4261         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4262         "\t    the enable_event and disable_event triggers.\n"
4263 #endif
4264 ;
4265
4266 static ssize_t
4267 tracing_readme_read(struct file *filp, char __user *ubuf,
4268                        size_t cnt, loff_t *ppos)
4269 {
4270         return simple_read_from_buffer(ubuf, cnt, ppos,
4271                                         readme_msg, strlen(readme_msg));
4272 }
4273
4274 static const struct file_operations tracing_readme_fops = {
4275         .open           = tracing_open_generic,
4276         .read           = tracing_readme_read,
4277         .llseek         = generic_file_llseek,
4278 };
4279
4280 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4281 {
4282         unsigned int *ptr = v;
4283
4284         if (*pos || m->count)
4285                 ptr++;
4286
4287         (*pos)++;
4288
4289         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4290              ptr++) {
4291                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4292                         continue;
4293
4294                 return ptr;
4295         }
4296
4297         return NULL;
4298 }
4299
4300 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4301 {
4302         void *v;
4303         loff_t l = 0;
4304
4305         preempt_disable();
4306         arch_spin_lock(&trace_cmdline_lock);
4307
4308         v = &savedcmd->map_cmdline_to_pid[0];
4309         while (l <= *pos) {
4310                 v = saved_cmdlines_next(m, v, &l);
4311                 if (!v)
4312                         return NULL;
4313         }
4314
4315         return v;
4316 }
4317
4318 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4319 {
4320         arch_spin_unlock(&trace_cmdline_lock);
4321         preempt_enable();
4322 }
4323
4324 static int saved_cmdlines_show(struct seq_file *m, void *v)
4325 {
4326         char buf[TASK_COMM_LEN];
4327         unsigned int *pid = v;
4328
4329         __trace_find_cmdline(*pid, buf);
4330         seq_printf(m, "%d %s\n", *pid, buf);
4331         return 0;
4332 }
4333
4334 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4335         .start          = saved_cmdlines_start,
4336         .next           = saved_cmdlines_next,
4337         .stop           = saved_cmdlines_stop,
4338         .show           = saved_cmdlines_show,
4339 };
4340
4341 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4342 {
4343         if (tracing_disabled)
4344                 return -ENODEV;
4345
4346         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4347 }
4348
4349 static const struct file_operations tracing_saved_cmdlines_fops = {
4350         .open           = tracing_saved_cmdlines_open,
4351         .read           = seq_read,
4352         .llseek         = seq_lseek,
4353         .release        = seq_release,
4354 };
4355
4356 static ssize_t
4357 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4358                                  size_t cnt, loff_t *ppos)
4359 {
4360         char buf[64];
4361         int r;
4362
4363         arch_spin_lock(&trace_cmdline_lock);
4364         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4365         arch_spin_unlock(&trace_cmdline_lock);
4366
4367         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4368 }
4369
4370 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4371 {
4372         kfree(s->saved_cmdlines);
4373         kfree(s->map_cmdline_to_pid);
4374         kfree(s);
4375 }
4376
4377 static int tracing_resize_saved_cmdlines(unsigned int val)
4378 {
4379         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4380
4381         s = kmalloc(sizeof(*s), GFP_KERNEL);
4382         if (!s)
4383                 return -ENOMEM;
4384
4385         if (allocate_cmdlines_buffer(val, s) < 0) {
4386                 kfree(s);
4387                 return -ENOMEM;
4388         }
4389
4390         arch_spin_lock(&trace_cmdline_lock);
4391         savedcmd_temp = savedcmd;
4392         savedcmd = s;
4393         arch_spin_unlock(&trace_cmdline_lock);
4394         free_saved_cmdlines_buffer(savedcmd_temp);
4395
4396         return 0;
4397 }
4398
4399 static ssize_t
4400 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4401                                   size_t cnt, loff_t *ppos)
4402 {
4403         unsigned long val;
4404         int ret;
4405
4406         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4407         if (ret)
4408                 return ret;
4409
4410         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4411         if (!val || val > PID_MAX_DEFAULT)
4412                 return -EINVAL;
4413
4414         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4415         if (ret < 0)
4416                 return ret;
4417
4418         *ppos += cnt;
4419
4420         return cnt;
4421 }
4422
4423 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4424         .open           = tracing_open_generic,
4425         .read           = tracing_saved_cmdlines_size_read,
4426         .write          = tracing_saved_cmdlines_size_write,
4427 };
4428
4429 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4430 static union trace_enum_map_item *
4431 update_enum_map(union trace_enum_map_item *ptr)
4432 {
4433         if (!ptr->map.enum_string) {
4434                 if (ptr->tail.next) {
4435                         ptr = ptr->tail.next;
4436                         /* Set ptr to the next real item (skip head) */
4437                         ptr++;
4438                 } else
4439                         return NULL;
4440         }
4441         return ptr;
4442 }
4443
4444 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4445 {
4446         union trace_enum_map_item *ptr = v;
4447
4448         /*
4449          * Paranoid! If ptr points to end, we don't want to increment past it.
4450          * This really should never happen.
4451          */
4452         ptr = update_enum_map(ptr);
4453         if (WARN_ON_ONCE(!ptr))
4454                 return NULL;
4455
4456         ptr++;
4457
4458         (*pos)++;
4459
4460         ptr = update_enum_map(ptr);
4461
4462         return ptr;
4463 }
4464
4465 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4466 {
4467         union trace_enum_map_item *v;
4468         loff_t l = 0;
4469
4470         mutex_lock(&trace_enum_mutex);
4471
4472         v = trace_enum_maps;
4473         if (v)
4474                 v++;
4475
4476         while (v && l < *pos) {
4477                 v = enum_map_next(m, v, &l);
4478         }
4479
4480         return v;
4481 }
4482
4483 static void enum_map_stop(struct seq_file *m, void *v)
4484 {
4485         mutex_unlock(&trace_enum_mutex);
4486 }
4487
4488 static int enum_map_show(struct seq_file *m, void *v)
4489 {
4490         union trace_enum_map_item *ptr = v;
4491
4492         seq_printf(m, "%s %ld (%s)\n",
4493                    ptr->map.enum_string, ptr->map.enum_value,
4494                    ptr->map.system);
4495
4496         return 0;
4497 }
4498
4499 static const struct seq_operations tracing_enum_map_seq_ops = {
4500         .start          = enum_map_start,
4501         .next           = enum_map_next,
4502         .stop           = enum_map_stop,
4503         .show           = enum_map_show,
4504 };
4505
4506 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4507 {
4508         if (tracing_disabled)
4509                 return -ENODEV;
4510
4511         return seq_open(filp, &tracing_enum_map_seq_ops);
4512 }
4513
4514 static const struct file_operations tracing_enum_map_fops = {
4515         .open           = tracing_enum_map_open,
4516         .read           = seq_read,
4517         .llseek         = seq_lseek,
4518         .release        = seq_release,
4519 };
4520
4521 static inline union trace_enum_map_item *
4522 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4523 {
4524         /* Return tail of array given the head */
4525         return ptr + ptr->head.length + 1;
4526 }
4527
4528 static void
4529 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4530                            int len)
4531 {
4532         struct trace_enum_map **stop;
4533         struct trace_enum_map **map;
4534         union trace_enum_map_item *map_array;
4535         union trace_enum_map_item *ptr;
4536
4537         stop = start + len;
4538
4539         /*
4540          * The trace_enum_maps contains the map plus a head and tail item,
4541          * where the head holds the module and length of array, and the
4542          * tail holds a pointer to the next list.
4543          */
4544         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4545         if (!map_array) {
4546                 pr_warn("Unable to allocate trace enum mapping\n");
4547                 return;
4548         }
4549
4550         mutex_lock(&trace_enum_mutex);
4551
4552         if (!trace_enum_maps)
4553                 trace_enum_maps = map_array;
4554         else {
4555                 ptr = trace_enum_maps;
4556                 for (;;) {
4557                         ptr = trace_enum_jmp_to_tail(ptr);
4558                         if (!ptr->tail.next)
4559                                 break;
4560                         ptr = ptr->tail.next;
4561
4562                 }
4563                 ptr->tail.next = map_array;
4564         }
4565         map_array->head.mod = mod;
4566         map_array->head.length = len;
4567         map_array++;
4568
4569         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4570                 map_array->map = **map;
4571                 map_array++;
4572         }
4573         memset(map_array, 0, sizeof(*map_array));
4574
4575         mutex_unlock(&trace_enum_mutex);
4576 }
4577
4578 static void trace_create_enum_file(struct dentry *d_tracer)
4579 {
4580         trace_create_file("enum_map", 0444, d_tracer,
4581                           NULL, &tracing_enum_map_fops);
4582 }
4583
4584 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4585 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4586 static inline void trace_insert_enum_map_file(struct module *mod,
4587                               struct trace_enum_map **start, int len) { }
4588 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4589
4590 static void trace_insert_enum_map(struct module *mod,
4591                                   struct trace_enum_map **start, int len)
4592 {
4593         struct trace_enum_map **map;
4594
4595         if (len <= 0)
4596                 return;
4597
4598         map = start;
4599
4600         trace_event_enum_update(map, len);
4601
4602         trace_insert_enum_map_file(mod, start, len);
4603 }
4604
4605 static ssize_t
4606 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4607                        size_t cnt, loff_t *ppos)
4608 {
4609         struct trace_array *tr = filp->private_data;
4610         char buf[MAX_TRACER_SIZE+2];
4611         int r;
4612
4613         mutex_lock(&trace_types_lock);
4614         r = sprintf(buf, "%s\n", tr->current_trace->name);
4615         mutex_unlock(&trace_types_lock);
4616
4617         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4618 }
4619
4620 int tracer_init(struct tracer *t, struct trace_array *tr)
4621 {
4622         tracing_reset_online_cpus(&tr->trace_buffer);
4623         return t->init(tr);
4624 }
4625
4626 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4627 {
4628         int cpu;
4629
4630         for_each_tracing_cpu(cpu)
4631                 per_cpu_ptr(buf->data, cpu)->entries = val;
4632 }
4633
4634 #ifdef CONFIG_TRACER_MAX_TRACE
4635 /* resize @tr's buffer to the size of @size_tr's entries */
4636 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4637                                         struct trace_buffer *size_buf, int cpu_id)
4638 {
4639         int cpu, ret = 0;
4640
4641         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4642                 for_each_tracing_cpu(cpu) {
4643                         ret = ring_buffer_resize(trace_buf->buffer,
4644                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4645                         if (ret < 0)
4646                                 break;
4647                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4648                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4649                 }
4650         } else {
4651                 ret = ring_buffer_resize(trace_buf->buffer,
4652                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4653                 if (ret == 0)
4654                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4655                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4656         }
4657
4658         return ret;
4659 }
4660 #endif /* CONFIG_TRACER_MAX_TRACE */
4661
4662 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4663                                         unsigned long size, int cpu)
4664 {
4665         int ret;
4666
4667         /*
4668          * If kernel or user changes the size of the ring buffer
4669          * we use the size that was given, and we can forget about
4670          * expanding it later.
4671          */
4672         ring_buffer_expanded = true;
4673
4674         /* May be called before buffers are initialized */
4675         if (!tr->trace_buffer.buffer)
4676                 return 0;
4677
4678         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4679         if (ret < 0)
4680                 return ret;
4681
4682 #ifdef CONFIG_TRACER_MAX_TRACE
4683         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4684             !tr->current_trace->use_max_tr)
4685                 goto out;
4686
4687         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4688         if (ret < 0) {
4689                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4690                                                      &tr->trace_buffer, cpu);
4691                 if (r < 0) {
4692                         /*
4693                          * AARGH! We are left with different
4694                          * size max buffer!!!!
4695                          * The max buffer is our "snapshot" buffer.
4696                          * When a tracer needs a snapshot (one of the
4697                          * latency tracers), it swaps the max buffer
4698                          * with the saved snap shot. We succeeded to
4699                          * update the size of the main buffer, but failed to
4700                          * update the size of the max buffer. But when we tried
4701                          * to reset the main buffer to the original size, we
4702                          * failed there too. This is very unlikely to
4703                          * happen, but if it does, warn and kill all
4704                          * tracing.
4705                          */
4706                         WARN_ON(1);
4707                         tracing_disabled = 1;
4708                 }
4709                 return ret;
4710         }
4711
4712         if (cpu == RING_BUFFER_ALL_CPUS)
4713                 set_buffer_entries(&tr->max_buffer, size);
4714         else
4715                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4716
4717  out:
4718 #endif /* CONFIG_TRACER_MAX_TRACE */
4719
4720         if (cpu == RING_BUFFER_ALL_CPUS)
4721                 set_buffer_entries(&tr->trace_buffer, size);
4722         else
4723                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4724
4725         return ret;
4726 }
4727
4728 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4729                                           unsigned long size, int cpu_id)
4730 {
4731         int ret = size;
4732
4733         mutex_lock(&trace_types_lock);
4734
4735         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4736                 /* make sure, this cpu is enabled in the mask */
4737                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4738                         ret = -EINVAL;
4739                         goto out;
4740                 }
4741         }
4742
4743         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4744         if (ret < 0)
4745                 ret = -ENOMEM;
4746
4747 out:
4748         mutex_unlock(&trace_types_lock);
4749
4750         return ret;
4751 }
4752
4753
4754 /**
4755  * tracing_update_buffers - used by tracing facility to expand ring buffers
4756  *
4757  * To save on memory when the tracing is never used on a system with it
4758  * configured in. The ring buffers are set to a minimum size. But once
4759  * a user starts to use the tracing facility, then they need to grow
4760  * to their default size.
4761  *
4762  * This function is to be called when a tracer is about to be used.
4763  */
4764 int tracing_update_buffers(void)
4765 {
4766         int ret = 0;
4767
4768         mutex_lock(&trace_types_lock);
4769         if (!ring_buffer_expanded)
4770                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4771                                                 RING_BUFFER_ALL_CPUS);
4772         mutex_unlock(&trace_types_lock);
4773
4774         return ret;
4775 }
4776
4777 struct trace_option_dentry;
4778
4779 static void
4780 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4781
4782 /*
4783  * Used to clear out the tracer before deletion of an instance.
4784  * Must have trace_types_lock held.
4785  */
4786 static void tracing_set_nop(struct trace_array *tr)
4787 {
4788         if (tr->current_trace == &nop_trace)
4789                 return;
4790         
4791         tr->current_trace->enabled--;
4792
4793         if (tr->current_trace->reset)
4794                 tr->current_trace->reset(tr);
4795
4796         tr->current_trace = &nop_trace;
4797 }
4798
4799 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4800 {
4801         /* Only enable if the directory has been created already. */
4802         if (!tr->dir)
4803                 return;
4804
4805         create_trace_option_files(tr, t);
4806 }
4807
4808 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4809 {
4810         struct tracer *t;
4811 #ifdef CONFIG_TRACER_MAX_TRACE
4812         bool had_max_tr;
4813 #endif
4814         int ret = 0;
4815
4816         mutex_lock(&trace_types_lock);
4817
4818         if (!ring_buffer_expanded) {
4819                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4820                                                 RING_BUFFER_ALL_CPUS);
4821                 if (ret < 0)
4822                         goto out;
4823                 ret = 0;
4824         }
4825
4826         for (t = trace_types; t; t = t->next) {
4827                 if (strcmp(t->name, buf) == 0)
4828                         break;
4829         }
4830         if (!t) {
4831                 ret = -EINVAL;
4832                 goto out;
4833         }
4834         if (t == tr->current_trace)
4835                 goto out;
4836
4837         /* Some tracers are only allowed for the top level buffer */
4838         if (!trace_ok_for_array(t, tr)) {
4839                 ret = -EINVAL;
4840                 goto out;
4841         }
4842
4843         /* If trace pipe files are being read, we can't change the tracer */
4844         if (tr->current_trace->ref) {
4845                 ret = -EBUSY;
4846                 goto out;
4847         }
4848
4849         trace_branch_disable();
4850
4851         tr->current_trace->enabled--;
4852
4853         if (tr->current_trace->reset)
4854                 tr->current_trace->reset(tr);
4855
4856         /* Current trace needs to be nop_trace before synchronize_sched */
4857         tr->current_trace = &nop_trace;
4858
4859 #ifdef CONFIG_TRACER_MAX_TRACE
4860         had_max_tr = tr->allocated_snapshot;
4861
4862         if (had_max_tr && !t->use_max_tr) {
4863                 /*
4864                  * We need to make sure that the update_max_tr sees that
4865                  * current_trace changed to nop_trace to keep it from
4866                  * swapping the buffers after we resize it.
4867                  * The update_max_tr is called from interrupts disabled
4868                  * so a synchronized_sched() is sufficient.
4869                  */
4870                 synchronize_sched();
4871                 free_snapshot(tr);
4872         }
4873 #endif
4874
4875 #ifdef CONFIG_TRACER_MAX_TRACE
4876         if (t->use_max_tr && !had_max_tr) {
4877                 ret = alloc_snapshot(tr);
4878                 if (ret < 0)
4879                         goto out;
4880         }
4881 #endif
4882
4883         if (t->init) {
4884                 ret = tracer_init(t, tr);
4885                 if (ret)
4886                         goto out;
4887         }
4888
4889         tr->current_trace = t;
4890         tr->current_trace->enabled++;
4891         trace_branch_enable(tr);
4892  out:
4893         mutex_unlock(&trace_types_lock);
4894
4895         return ret;
4896 }
4897
4898 static ssize_t
4899 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4900                         size_t cnt, loff_t *ppos)
4901 {
4902         struct trace_array *tr = filp->private_data;
4903         char buf[MAX_TRACER_SIZE+1];
4904         int i;
4905         size_t ret;
4906         int err;
4907
4908         ret = cnt;
4909
4910         if (cnt > MAX_TRACER_SIZE)
4911                 cnt = MAX_TRACER_SIZE;
4912
4913         if (copy_from_user(buf, ubuf, cnt))
4914                 return -EFAULT;
4915
4916         buf[cnt] = 0;
4917
4918         /* strip ending whitespace. */
4919         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4920                 buf[i] = 0;
4921
4922         err = tracing_set_tracer(tr, buf);
4923         if (err)
4924                 return err;
4925
4926         *ppos += ret;
4927
4928         return ret;
4929 }
4930
4931 static ssize_t
4932 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4933                    size_t cnt, loff_t *ppos)
4934 {
4935         char buf[64];
4936         int r;
4937
4938         r = snprintf(buf, sizeof(buf), "%ld\n",
4939                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4940         if (r > sizeof(buf))
4941                 r = sizeof(buf);
4942         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4943 }
4944
4945 static ssize_t
4946 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4947                     size_t cnt, loff_t *ppos)
4948 {
4949         unsigned long val;
4950         int ret;
4951
4952         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4953         if (ret)
4954                 return ret;
4955
4956         *ptr = val * 1000;
4957
4958         return cnt;
4959 }
4960
4961 static ssize_t
4962 tracing_thresh_read(struct file *filp, char __user *ubuf,
4963                     size_t cnt, loff_t *ppos)
4964 {
4965         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4966 }
4967
4968 static ssize_t
4969 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4970                      size_t cnt, loff_t *ppos)
4971 {
4972         struct trace_array *tr = filp->private_data;
4973         int ret;
4974
4975         mutex_lock(&trace_types_lock);
4976         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4977         if (ret < 0)
4978                 goto out;
4979
4980         if (tr->current_trace->update_thresh) {
4981                 ret = tr->current_trace->update_thresh(tr);
4982                 if (ret < 0)
4983                         goto out;
4984         }
4985
4986         ret = cnt;
4987 out:
4988         mutex_unlock(&trace_types_lock);
4989
4990         return ret;
4991 }
4992
4993 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4994
4995 static ssize_t
4996 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4997                      size_t cnt, loff_t *ppos)
4998 {
4999         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5000 }
5001
5002 static ssize_t
5003 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5004                       size_t cnt, loff_t *ppos)
5005 {
5006         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5007 }
5008
5009 #endif
5010
5011 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5012 {
5013         struct trace_array *tr = inode->i_private;
5014         struct trace_iterator *iter;
5015         int ret = 0;
5016
5017         if (tracing_disabled)
5018                 return -ENODEV;
5019
5020         if (trace_array_get(tr) < 0)
5021                 return -ENODEV;
5022
5023         mutex_lock(&trace_types_lock);
5024
5025         /* create a buffer to store the information to pass to userspace */
5026         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5027         if (!iter) {
5028                 ret = -ENOMEM;
5029                 __trace_array_put(tr);
5030                 goto out;
5031         }
5032
5033         trace_seq_init(&iter->seq);
5034         iter->trace = tr->current_trace;
5035
5036         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5037                 ret = -ENOMEM;
5038                 goto fail;
5039         }
5040
5041         /* trace pipe does not show start of buffer */
5042         cpumask_setall(iter->started);
5043
5044         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5045                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5046
5047         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5048         if (trace_clocks[tr->clock_id].in_ns)
5049                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5050
5051         iter->tr = tr;
5052         iter->trace_buffer = &tr->trace_buffer;
5053         iter->cpu_file = tracing_get_cpu(inode);
5054         mutex_init(&iter->mutex);
5055         filp->private_data = iter;
5056
5057         if (iter->trace->pipe_open)
5058                 iter->trace->pipe_open(iter);
5059
5060         nonseekable_open(inode, filp);
5061
5062         tr->current_trace->ref++;
5063 out:
5064         mutex_unlock(&trace_types_lock);
5065         return ret;
5066
5067 fail:
5068         kfree(iter);
5069         __trace_array_put(tr);
5070         mutex_unlock(&trace_types_lock);
5071         return ret;
5072 }
5073
5074 static int tracing_release_pipe(struct inode *inode, struct file *file)
5075 {
5076         struct trace_iterator *iter = file->private_data;
5077         struct trace_array *tr = inode->i_private;
5078
5079         mutex_lock(&trace_types_lock);
5080
5081         tr->current_trace->ref--;
5082
5083         if (iter->trace->pipe_close)
5084                 iter->trace->pipe_close(iter);
5085
5086         mutex_unlock(&trace_types_lock);
5087
5088         free_cpumask_var(iter->started);
5089         mutex_destroy(&iter->mutex);
5090         kfree(iter);
5091
5092         trace_array_put(tr);
5093
5094         return 0;
5095 }
5096
5097 static unsigned int
5098 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5099 {
5100         struct trace_array *tr = iter->tr;
5101
5102         /* Iterators are static, they should be filled or empty */
5103         if (trace_buffer_iter(iter, iter->cpu_file))
5104                 return POLLIN | POLLRDNORM;
5105
5106         if (tr->trace_flags & TRACE_ITER_BLOCK)
5107                 /*
5108                  * Always select as readable when in blocking mode
5109                  */
5110                 return POLLIN | POLLRDNORM;
5111         else
5112                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5113                                              filp, poll_table);
5114 }
5115
5116 static unsigned int
5117 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5118 {
5119         struct trace_iterator *iter = filp->private_data;
5120
5121         return trace_poll(iter, filp, poll_table);
5122 }
5123
5124 /* Must be called with iter->mutex held. */
5125 static int tracing_wait_pipe(struct file *filp)
5126 {
5127         struct trace_iterator *iter = filp->private_data;
5128         int ret;
5129
5130         while (trace_empty(iter)) {
5131
5132                 if ((filp->f_flags & O_NONBLOCK)) {
5133                         return -EAGAIN;
5134                 }
5135
5136                 /*
5137                  * We block until we read something and tracing is disabled.
5138                  * We still block if tracing is disabled, but we have never
5139                  * read anything. This allows a user to cat this file, and
5140                  * then enable tracing. But after we have read something,
5141                  * we give an EOF when tracing is again disabled.
5142                  *
5143                  * iter->pos will be 0 if we haven't read anything.
5144                  */
5145                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5146                         break;
5147
5148                 mutex_unlock(&iter->mutex);
5149
5150                 ret = wait_on_pipe(iter, false);
5151
5152                 mutex_lock(&iter->mutex);
5153
5154                 if (ret)
5155                         return ret;
5156         }
5157
5158         return 1;
5159 }
5160
5161 /*
5162  * Consumer reader.
5163  */
5164 static ssize_t
5165 tracing_read_pipe(struct file *filp, char __user *ubuf,
5166                   size_t cnt, loff_t *ppos)
5167 {
5168         struct trace_iterator *iter = filp->private_data;
5169         ssize_t sret;
5170
5171         /*
5172          * Avoid more than one consumer on a single file descriptor
5173          * This is just a matter of traces coherency, the ring buffer itself
5174          * is protected.
5175          */
5176         mutex_lock(&iter->mutex);
5177
5178         /* return any leftover data */
5179         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5180         if (sret != -EBUSY)
5181                 goto out;
5182
5183         trace_seq_init(&iter->seq);
5184
5185         if (iter->trace->read) {
5186                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5187                 if (sret)
5188                         goto out;
5189         }
5190
5191 waitagain:
5192         sret = tracing_wait_pipe(filp);
5193         if (sret <= 0)
5194                 goto out;
5195
5196         /* stop when tracing is finished */
5197         if (trace_empty(iter)) {
5198                 sret = 0;
5199                 goto out;
5200         }
5201
5202         if (cnt >= PAGE_SIZE)
5203                 cnt = PAGE_SIZE - 1;
5204
5205         /* reset all but tr, trace, and overruns */
5206         memset(&iter->seq, 0,
5207                sizeof(struct trace_iterator) -
5208                offsetof(struct trace_iterator, seq));
5209         cpumask_clear(iter->started);
5210         trace_seq_init(&iter->seq);
5211         iter->pos = -1;
5212
5213         trace_event_read_lock();
5214         trace_access_lock(iter->cpu_file);
5215         while (trace_find_next_entry_inc(iter) != NULL) {
5216                 enum print_line_t ret;
5217                 int save_len = iter->seq.seq.len;
5218
5219                 ret = print_trace_line(iter);
5220                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5221                         /* don't print partial lines */
5222                         iter->seq.seq.len = save_len;
5223                         break;
5224                 }
5225                 if (ret != TRACE_TYPE_NO_CONSUME)
5226                         trace_consume(iter);
5227
5228                 if (trace_seq_used(&iter->seq) >= cnt)
5229                         break;
5230
5231                 /*
5232                  * Setting the full flag means we reached the trace_seq buffer
5233                  * size and we should leave by partial output condition above.
5234                  * One of the trace_seq_* functions is not used properly.
5235                  */
5236                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5237                           iter->ent->type);
5238         }
5239         trace_access_unlock(iter->cpu_file);
5240         trace_event_read_unlock();
5241
5242         /* Now copy what we have to the user */
5243         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5244         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5245                 trace_seq_init(&iter->seq);
5246
5247         /*
5248          * If there was nothing to send to user, in spite of consuming trace
5249          * entries, go back to wait for more entries.
5250          */
5251         if (sret == -EBUSY)
5252                 goto waitagain;
5253
5254 out:
5255         mutex_unlock(&iter->mutex);
5256
5257         return sret;
5258 }
5259
5260 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5261                                      unsigned int idx)
5262 {
5263         __free_page(spd->pages[idx]);
5264 }
5265
5266 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5267         .can_merge              = 0,
5268         .confirm                = generic_pipe_buf_confirm,
5269         .release                = generic_pipe_buf_release,
5270         .steal                  = generic_pipe_buf_steal,
5271         .get                    = generic_pipe_buf_get,
5272 };
5273
5274 static size_t
5275 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5276 {
5277         size_t count;
5278         int save_len;
5279         int ret;
5280
5281         /* Seq buffer is page-sized, exactly what we need. */
5282         for (;;) {
5283                 save_len = iter->seq.seq.len;
5284                 ret = print_trace_line(iter);
5285
5286                 if (trace_seq_has_overflowed(&iter->seq)) {
5287                         iter->seq.seq.len = save_len;
5288                         break;
5289                 }
5290
5291                 /*
5292                  * This should not be hit, because it should only
5293                  * be set if the iter->seq overflowed. But check it
5294                  * anyway to be safe.
5295                  */
5296                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5297                         iter->seq.seq.len = save_len;
5298                         break;
5299                 }
5300
5301                 count = trace_seq_used(&iter->seq) - save_len;
5302                 if (rem < count) {
5303                         rem = 0;
5304                         iter->seq.seq.len = save_len;
5305                         break;
5306                 }
5307
5308                 if (ret != TRACE_TYPE_NO_CONSUME)
5309                         trace_consume(iter);
5310                 rem -= count;
5311                 if (!trace_find_next_entry_inc(iter))   {
5312                         rem = 0;
5313                         iter->ent = NULL;
5314                         break;
5315                 }
5316         }
5317
5318         return rem;
5319 }
5320
5321 static ssize_t tracing_splice_read_pipe(struct file *filp,
5322                                         loff_t *ppos,
5323                                         struct pipe_inode_info *pipe,
5324                                         size_t len,
5325                                         unsigned int flags)
5326 {
5327         struct page *pages_def[PIPE_DEF_BUFFERS];
5328         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5329         struct trace_iterator *iter = filp->private_data;
5330         struct splice_pipe_desc spd = {
5331                 .pages          = pages_def,
5332                 .partial        = partial_def,
5333                 .nr_pages       = 0, /* This gets updated below. */
5334                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5335                 .flags          = flags,
5336                 .ops            = &tracing_pipe_buf_ops,
5337                 .spd_release    = tracing_spd_release_pipe,
5338         };
5339         ssize_t ret;
5340         size_t rem;
5341         unsigned int i;
5342
5343         if (splice_grow_spd(pipe, &spd))
5344                 return -ENOMEM;
5345
5346         mutex_lock(&iter->mutex);
5347
5348         if (iter->trace->splice_read) {
5349                 ret = iter->trace->splice_read(iter, filp,
5350                                                ppos, pipe, len, flags);
5351                 if (ret)
5352                         goto out_err;
5353         }
5354
5355         ret = tracing_wait_pipe(filp);
5356         if (ret <= 0)
5357                 goto out_err;
5358
5359         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5360                 ret = -EFAULT;
5361                 goto out_err;
5362         }
5363
5364         trace_event_read_lock();
5365         trace_access_lock(iter->cpu_file);
5366
5367         /* Fill as many pages as possible. */
5368         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5369                 spd.pages[i] = alloc_page(GFP_KERNEL);
5370                 if (!spd.pages[i])
5371                         break;
5372
5373                 rem = tracing_fill_pipe_page(rem, iter);
5374
5375                 /* Copy the data into the page, so we can start over. */
5376                 ret = trace_seq_to_buffer(&iter->seq,
5377                                           page_address(spd.pages[i]),
5378                                           trace_seq_used(&iter->seq));
5379                 if (ret < 0) {
5380                         __free_page(spd.pages[i]);
5381                         break;
5382                 }
5383                 spd.partial[i].offset = 0;
5384                 spd.partial[i].len = trace_seq_used(&iter->seq);
5385
5386                 trace_seq_init(&iter->seq);
5387         }
5388
5389         trace_access_unlock(iter->cpu_file);
5390         trace_event_read_unlock();
5391         mutex_unlock(&iter->mutex);
5392
5393         spd.nr_pages = i;
5394
5395         if (i)
5396                 ret = splice_to_pipe(pipe, &spd);
5397         else
5398                 ret = 0;
5399 out:
5400         splice_shrink_spd(&spd);
5401         return ret;
5402
5403 out_err:
5404         mutex_unlock(&iter->mutex);
5405         goto out;
5406 }
5407
5408 static ssize_t
5409 tracing_entries_read(struct file *filp, char __user *ubuf,
5410                      size_t cnt, loff_t *ppos)
5411 {
5412         struct inode *inode = file_inode(filp);
5413         struct trace_array *tr = inode->i_private;
5414         int cpu = tracing_get_cpu(inode);
5415         char buf[64];
5416         int r = 0;
5417         ssize_t ret;
5418
5419         mutex_lock(&trace_types_lock);
5420
5421         if (cpu == RING_BUFFER_ALL_CPUS) {
5422                 int cpu, buf_size_same;
5423                 unsigned long size;
5424
5425                 size = 0;
5426                 buf_size_same = 1;
5427                 /* check if all cpu sizes are same */
5428                 for_each_tracing_cpu(cpu) {
5429                         /* fill in the size from first enabled cpu */
5430                         if (size == 0)
5431                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5432                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5433                                 buf_size_same = 0;
5434                                 break;
5435                         }
5436                 }
5437
5438                 if (buf_size_same) {
5439                         if (!ring_buffer_expanded)
5440                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5441                                             size >> 10,
5442                                             trace_buf_size >> 10);
5443                         else
5444                                 r = sprintf(buf, "%lu\n", size >> 10);
5445                 } else
5446                         r = sprintf(buf, "X\n");
5447         } else
5448                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5449
5450         mutex_unlock(&trace_types_lock);
5451
5452         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5453         return ret;
5454 }
5455
5456 static ssize_t
5457 tracing_entries_write(struct file *filp, const char __user *ubuf,
5458                       size_t cnt, loff_t *ppos)
5459 {
5460         struct inode *inode = file_inode(filp);
5461         struct trace_array *tr = inode->i_private;
5462         unsigned long val;
5463         int ret;
5464
5465         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5466         if (ret)
5467                 return ret;
5468
5469         /* must have at least 1 entry */
5470         if (!val)
5471                 return -EINVAL;
5472
5473         /* value is in KB */
5474         val <<= 10;
5475         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5476         if (ret < 0)
5477                 return ret;
5478
5479         *ppos += cnt;
5480
5481         return cnt;
5482 }
5483
5484 static ssize_t
5485 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5486                                 size_t cnt, loff_t *ppos)
5487 {
5488         struct trace_array *tr = filp->private_data;
5489         char buf[64];
5490         int r, cpu;
5491         unsigned long size = 0, expanded_size = 0;
5492
5493         mutex_lock(&trace_types_lock);
5494         for_each_tracing_cpu(cpu) {
5495                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5496                 if (!ring_buffer_expanded)
5497                         expanded_size += trace_buf_size >> 10;
5498         }
5499         if (ring_buffer_expanded)
5500                 r = sprintf(buf, "%lu\n", size);
5501         else
5502                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5503         mutex_unlock(&trace_types_lock);
5504
5505         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5506 }
5507
5508 static ssize_t
5509 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5510                           size_t cnt, loff_t *ppos)
5511 {
5512         /*
5513          * There is no need to read what the user has written, this function
5514          * is just to make sure that there is no error when "echo" is used
5515          */
5516
5517         *ppos += cnt;
5518
5519         return cnt;
5520 }
5521
5522 static int
5523 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5524 {
5525         struct trace_array *tr = inode->i_private;
5526
5527         /* disable tracing ? */
5528         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5529                 tracer_tracing_off(tr);
5530         /* resize the ring buffer to 0 */
5531         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5532
5533         trace_array_put(tr);
5534
5535         return 0;
5536 }
5537
5538 static ssize_t
5539 tracing_mark_write(struct file *filp, const char __user *ubuf,
5540                                         size_t cnt, loff_t *fpos)
5541 {
5542         unsigned long addr = (unsigned long)ubuf;
5543         struct trace_array *tr = filp->private_data;
5544         struct ring_buffer_event *event;
5545         struct ring_buffer *buffer;
5546         struct print_entry *entry;
5547         unsigned long irq_flags;
5548         struct page *pages[2];
5549         void *map_page[2];
5550         int nr_pages = 1;
5551         ssize_t written;
5552         int offset;
5553         int size;
5554         int len;
5555         int ret;
5556         int i;
5557
5558         if (tracing_disabled)
5559                 return -EINVAL;
5560
5561         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5562                 return -EINVAL;
5563
5564         if (cnt > TRACE_BUF_SIZE)
5565                 cnt = TRACE_BUF_SIZE;
5566
5567         /*
5568          * Userspace is injecting traces into the kernel trace buffer.
5569          * We want to be as non intrusive as possible.
5570          * To do so, we do not want to allocate any special buffers
5571          * or take any locks, but instead write the userspace data
5572          * straight into the ring buffer.
5573          *
5574          * First we need to pin the userspace buffer into memory,
5575          * which, most likely it is, because it just referenced it.
5576          * But there's no guarantee that it is. By using get_user_pages_fast()
5577          * and kmap_atomic/kunmap_atomic() we can get access to the
5578          * pages directly. We then write the data directly into the
5579          * ring buffer.
5580          */
5581         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5582
5583         /* check if we cross pages */
5584         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5585                 nr_pages = 2;
5586
5587         offset = addr & (PAGE_SIZE - 1);
5588         addr &= PAGE_MASK;
5589
5590         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5591         if (ret < nr_pages) {
5592                 while (--ret >= 0)
5593                         put_page(pages[ret]);
5594                 written = -EFAULT;
5595                 goto out;
5596         }
5597
5598         for (i = 0; i < nr_pages; i++)
5599                 map_page[i] = kmap_atomic(pages[i]);
5600
5601         local_save_flags(irq_flags);
5602         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5603         buffer = tr->trace_buffer.buffer;
5604         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5605                                           irq_flags, preempt_count());
5606         if (!event) {
5607                 /* Ring buffer disabled, return as if not open for write */
5608                 written = -EBADF;
5609                 goto out_unlock;
5610         }
5611
5612         entry = ring_buffer_event_data(event);
5613         entry->ip = _THIS_IP_;
5614
5615         if (nr_pages == 2) {
5616                 len = PAGE_SIZE - offset;
5617                 memcpy(&entry->buf, map_page[0] + offset, len);
5618                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5619         } else
5620                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5621
5622         if (entry->buf[cnt - 1] != '\n') {
5623                 entry->buf[cnt] = '\n';
5624                 entry->buf[cnt + 1] = '\0';
5625         } else
5626                 entry->buf[cnt] = '\0';
5627
5628         __buffer_unlock_commit(buffer, event);
5629
5630         written = cnt;
5631
5632         *fpos += written;
5633
5634  out_unlock:
5635         for (i = nr_pages - 1; i >= 0; i--) {
5636                 kunmap_atomic(map_page[i]);
5637                 put_page(pages[i]);
5638         }
5639  out:
5640         return written;
5641 }
5642
5643 static int tracing_clock_show(struct seq_file *m, void *v)
5644 {
5645         struct trace_array *tr = m->private;
5646         int i;
5647
5648         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5649                 seq_printf(m,
5650                         "%s%s%s%s", i ? " " : "",
5651                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5652                         i == tr->clock_id ? "]" : "");
5653         seq_putc(m, '\n');
5654
5655         return 0;
5656 }
5657
5658 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5659 {
5660         int i;
5661
5662         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5663                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5664                         break;
5665         }
5666         if (i == ARRAY_SIZE(trace_clocks))
5667                 return -EINVAL;
5668
5669         mutex_lock(&trace_types_lock);
5670
5671         tr->clock_id = i;
5672
5673         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5674
5675         /*
5676          * New clock may not be consistent with the previous clock.
5677          * Reset the buffer so that it doesn't have incomparable timestamps.
5678          */
5679         tracing_reset_online_cpus(&tr->trace_buffer);
5680
5681 #ifdef CONFIG_TRACER_MAX_TRACE
5682         if (tr->max_buffer.buffer)
5683                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5684         tracing_reset_online_cpus(&tr->max_buffer);
5685 #endif
5686
5687         mutex_unlock(&trace_types_lock);
5688
5689         return 0;
5690 }
5691
5692 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5693                                    size_t cnt, loff_t *fpos)
5694 {
5695         struct seq_file *m = filp->private_data;
5696         struct trace_array *tr = m->private;
5697         char buf[64];
5698         const char *clockstr;
5699         int ret;
5700
5701         if (cnt >= sizeof(buf))
5702                 return -EINVAL;
5703
5704         if (copy_from_user(buf, ubuf, cnt))
5705                 return -EFAULT;
5706
5707         buf[cnt] = 0;
5708
5709         clockstr = strstrip(buf);
5710
5711         ret = tracing_set_clock(tr, clockstr);
5712         if (ret)
5713                 return ret;
5714
5715         *fpos += cnt;
5716
5717         return cnt;
5718 }
5719
5720 static int tracing_clock_open(struct inode *inode, struct file *file)
5721 {
5722         struct trace_array *tr = inode->i_private;
5723         int ret;
5724
5725         if (tracing_disabled)
5726                 return -ENODEV;
5727
5728         if (trace_array_get(tr))
5729                 return -ENODEV;
5730
5731         ret = single_open(file, tracing_clock_show, inode->i_private);
5732         if (ret < 0)
5733                 trace_array_put(tr);
5734
5735         return ret;
5736 }
5737
5738 struct ftrace_buffer_info {
5739         struct trace_iterator   iter;
5740         void                    *spare;
5741         unsigned int            read;
5742 };
5743
5744 #ifdef CONFIG_TRACER_SNAPSHOT
5745 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5746 {
5747         struct trace_array *tr = inode->i_private;
5748         struct trace_iterator *iter;
5749         struct seq_file *m;
5750         int ret = 0;
5751
5752         if (trace_array_get(tr) < 0)
5753                 return -ENODEV;
5754
5755         if (file->f_mode & FMODE_READ) {
5756                 iter = __tracing_open(inode, file, true);
5757                 if (IS_ERR(iter))
5758                         ret = PTR_ERR(iter);
5759         } else {
5760                 /* Writes still need the seq_file to hold the private data */
5761                 ret = -ENOMEM;
5762                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5763                 if (!m)
5764                         goto out;
5765                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5766                 if (!iter) {
5767                         kfree(m);
5768                         goto out;
5769                 }
5770                 ret = 0;
5771
5772                 iter->tr = tr;
5773                 iter->trace_buffer = &tr->max_buffer;
5774                 iter->cpu_file = tracing_get_cpu(inode);
5775                 m->private = iter;
5776                 file->private_data = m;
5777         }
5778 out:
5779         if (ret < 0)
5780                 trace_array_put(tr);
5781
5782         return ret;
5783 }
5784
5785 static ssize_t
5786 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5787                        loff_t *ppos)
5788 {
5789         struct seq_file *m = filp->private_data;
5790         struct trace_iterator *iter = m->private;
5791         struct trace_array *tr = iter->tr;
5792         unsigned long val;
5793         int ret;
5794
5795         ret = tracing_update_buffers();
5796         if (ret < 0)
5797                 return ret;
5798
5799         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5800         if (ret)
5801                 return ret;
5802
5803         mutex_lock(&trace_types_lock);
5804
5805         if (tr->current_trace->use_max_tr) {
5806                 ret = -EBUSY;
5807                 goto out;
5808         }
5809
5810         switch (val) {
5811         case 0:
5812                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5813                         ret = -EINVAL;
5814                         break;
5815                 }
5816                 if (tr->allocated_snapshot)
5817                         free_snapshot(tr);
5818                 break;
5819         case 1:
5820 /* Only allow per-cpu swap if the ring buffer supports it */
5821 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5822                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5823                         ret = -EINVAL;
5824                         break;
5825                 }
5826 #endif
5827                 if (!tr->allocated_snapshot)
5828                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5829                                 &tr->trace_buffer, iter->cpu_file);
5830                 else
5831                         ret = alloc_snapshot(tr);
5832
5833                 if (ret < 0)
5834                         break;
5835
5836                 local_irq_disable();
5837                 /* Now, we're going to swap */
5838                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5839                         update_max_tr(tr, current, smp_processor_id());
5840                 else
5841                         update_max_tr_single(tr, current, iter->cpu_file);
5842                 local_irq_enable();
5843                 break;
5844         default:
5845                 if (tr->allocated_snapshot) {
5846                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5847                                 tracing_reset_online_cpus(&tr->max_buffer);
5848                         else
5849                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5850                 }
5851                 break;
5852         }
5853
5854         if (ret >= 0) {
5855                 *ppos += cnt;
5856                 ret = cnt;
5857         }
5858 out:
5859         mutex_unlock(&trace_types_lock);
5860         return ret;
5861 }
5862
5863 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5864 {
5865         struct seq_file *m = file->private_data;
5866         int ret;
5867
5868         ret = tracing_release(inode, file);
5869
5870         if (file->f_mode & FMODE_READ)
5871                 return ret;
5872
5873         /* If write only, the seq_file is just a stub */
5874         if (m)
5875                 kfree(m->private);
5876         kfree(m);
5877
5878         return 0;
5879 }
5880
5881 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5882 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5883                                     size_t count, loff_t *ppos);
5884 static int tracing_buffers_release(struct inode *inode, struct file *file);
5885 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5886                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5887
5888 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5889 {
5890         struct ftrace_buffer_info *info;
5891         int ret;
5892
5893         ret = tracing_buffers_open(inode, filp);
5894         if (ret < 0)
5895                 return ret;
5896
5897         info = filp->private_data;
5898
5899         if (info->iter.trace->use_max_tr) {
5900                 tracing_buffers_release(inode, filp);
5901                 return -EBUSY;
5902         }
5903
5904         info->iter.snapshot = true;
5905         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5906
5907         return ret;
5908 }
5909
5910 #endif /* CONFIG_TRACER_SNAPSHOT */
5911
5912
5913 static const struct file_operations tracing_thresh_fops = {
5914         .open           = tracing_open_generic,
5915         .read           = tracing_thresh_read,
5916         .write          = tracing_thresh_write,
5917         .llseek         = generic_file_llseek,
5918 };
5919
5920 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5921 static const struct file_operations tracing_max_lat_fops = {
5922         .open           = tracing_open_generic,
5923         .read           = tracing_max_lat_read,
5924         .write          = tracing_max_lat_write,
5925         .llseek         = generic_file_llseek,
5926 };
5927 #endif
5928
5929 static const struct file_operations set_tracer_fops = {
5930         .open           = tracing_open_generic,
5931         .read           = tracing_set_trace_read,
5932         .write          = tracing_set_trace_write,
5933         .llseek         = generic_file_llseek,
5934 };
5935
5936 static const struct file_operations tracing_pipe_fops = {
5937         .open           = tracing_open_pipe,
5938         .poll           = tracing_poll_pipe,
5939         .read           = tracing_read_pipe,
5940         .splice_read    = tracing_splice_read_pipe,
5941         .release        = tracing_release_pipe,
5942         .llseek         = no_llseek,
5943 };
5944
5945 static const struct file_operations tracing_entries_fops = {
5946         .open           = tracing_open_generic_tr,
5947         .read           = tracing_entries_read,
5948         .write          = tracing_entries_write,
5949         .llseek         = generic_file_llseek,
5950         .release        = tracing_release_generic_tr,
5951 };
5952
5953 static const struct file_operations tracing_total_entries_fops = {
5954         .open           = tracing_open_generic_tr,
5955         .read           = tracing_total_entries_read,
5956         .llseek         = generic_file_llseek,
5957         .release        = tracing_release_generic_tr,
5958 };
5959
5960 static const struct file_operations tracing_free_buffer_fops = {
5961         .open           = tracing_open_generic_tr,
5962         .write          = tracing_free_buffer_write,
5963         .release        = tracing_free_buffer_release,
5964 };
5965
5966 static const struct file_operations tracing_mark_fops = {
5967         .open           = tracing_open_generic_tr,
5968         .write          = tracing_mark_write,
5969         .llseek         = generic_file_llseek,
5970         .release        = tracing_release_generic_tr,
5971 };
5972
5973 static const struct file_operations trace_clock_fops = {
5974         .open           = tracing_clock_open,
5975         .read           = seq_read,
5976         .llseek         = seq_lseek,
5977         .release        = tracing_single_release_tr,
5978         .write          = tracing_clock_write,
5979 };
5980
5981 #ifdef CONFIG_TRACER_SNAPSHOT
5982 static const struct file_operations snapshot_fops = {
5983         .open           = tracing_snapshot_open,
5984         .read           = seq_read,
5985         .write          = tracing_snapshot_write,
5986         .llseek         = tracing_lseek,
5987         .release        = tracing_snapshot_release,
5988 };
5989
5990 static const struct file_operations snapshot_raw_fops = {
5991         .open           = snapshot_raw_open,
5992         .read           = tracing_buffers_read,
5993         .release        = tracing_buffers_release,
5994         .splice_read    = tracing_buffers_splice_read,
5995         .llseek         = no_llseek,
5996 };
5997
5998 #endif /* CONFIG_TRACER_SNAPSHOT */
5999
6000 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6001 {
6002         struct trace_array *tr = inode->i_private;
6003         struct ftrace_buffer_info *info;
6004         int ret;
6005
6006         if (tracing_disabled)
6007                 return -ENODEV;
6008
6009         if (trace_array_get(tr) < 0)
6010                 return -ENODEV;
6011
6012         info = kzalloc(sizeof(*info), GFP_KERNEL);
6013         if (!info) {
6014                 trace_array_put(tr);
6015                 return -ENOMEM;
6016         }
6017
6018         mutex_lock(&trace_types_lock);
6019
6020         info->iter.tr           = tr;
6021         info->iter.cpu_file     = tracing_get_cpu(inode);
6022         info->iter.trace        = tr->current_trace;
6023         info->iter.trace_buffer = &tr->trace_buffer;
6024         info->spare             = NULL;
6025         /* Force reading ring buffer for first read */
6026         info->read              = (unsigned int)-1;
6027
6028         filp->private_data = info;
6029
6030         tr->current_trace->ref++;
6031
6032         mutex_unlock(&trace_types_lock);
6033
6034         ret = nonseekable_open(inode, filp);
6035         if (ret < 0)
6036                 trace_array_put(tr);
6037
6038         return ret;
6039 }
6040
6041 static unsigned int
6042 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6043 {
6044         struct ftrace_buffer_info *info = filp->private_data;
6045         struct trace_iterator *iter = &info->iter;
6046
6047         return trace_poll(iter, filp, poll_table);
6048 }
6049
6050 static ssize_t
6051 tracing_buffers_read(struct file *filp, char __user *ubuf,
6052                      size_t count, loff_t *ppos)
6053 {
6054         struct ftrace_buffer_info *info = filp->private_data;
6055         struct trace_iterator *iter = &info->iter;
6056         ssize_t ret;
6057         ssize_t size;
6058
6059         if (!count)
6060                 return 0;
6061
6062 #ifdef CONFIG_TRACER_MAX_TRACE
6063         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6064                 return -EBUSY;
6065 #endif
6066
6067         if (!info->spare)
6068                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6069                                                           iter->cpu_file);
6070         if (!info->spare)
6071                 return -ENOMEM;
6072
6073         /* Do we have previous read data to read? */
6074         if (info->read < PAGE_SIZE)
6075                 goto read;
6076
6077  again:
6078         trace_access_lock(iter->cpu_file);
6079         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6080                                     &info->spare,
6081                                     count,
6082                                     iter->cpu_file, 0);
6083         trace_access_unlock(iter->cpu_file);
6084
6085         if (ret < 0) {
6086                 if (trace_empty(iter)) {
6087                         if ((filp->f_flags & O_NONBLOCK))
6088                                 return -EAGAIN;
6089
6090                         ret = wait_on_pipe(iter, false);
6091                         if (ret)
6092                                 return ret;
6093
6094                         goto again;
6095                 }
6096                 return 0;
6097         }
6098
6099         info->read = 0;
6100  read:
6101         size = PAGE_SIZE - info->read;
6102         if (size > count)
6103                 size = count;
6104
6105         ret = copy_to_user(ubuf, info->spare + info->read, size);
6106         if (ret == size)
6107                 return -EFAULT;
6108
6109         size -= ret;
6110
6111         *ppos += size;
6112         info->read += size;
6113
6114         return size;
6115 }
6116
6117 static int tracing_buffers_release(struct inode *inode, struct file *file)
6118 {
6119         struct ftrace_buffer_info *info = file->private_data;
6120         struct trace_iterator *iter = &info->iter;
6121
6122         mutex_lock(&trace_types_lock);
6123
6124         iter->tr->current_trace->ref--;
6125
6126         __trace_array_put(iter->tr);
6127
6128         if (info->spare)
6129                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6130         kfree(info);
6131
6132         mutex_unlock(&trace_types_lock);
6133
6134         return 0;
6135 }
6136
6137 struct buffer_ref {
6138         struct ring_buffer      *buffer;
6139         void                    *page;
6140         int                     ref;
6141 };
6142
6143 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6144                                     struct pipe_buffer *buf)
6145 {
6146         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6147
6148         if (--ref->ref)
6149                 return;
6150
6151         ring_buffer_free_read_page(ref->buffer, ref->page);
6152         kfree(ref);
6153         buf->private = 0;
6154 }
6155
6156 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6157                                 struct pipe_buffer *buf)
6158 {
6159         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6160
6161         if (ref->ref > INT_MAX/2)
6162                 return false;
6163
6164         ref->ref++;
6165         return true;
6166 }
6167
6168 /* Pipe buffer operations for a buffer. */
6169 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6170         .can_merge              = 0,
6171         .confirm                = generic_pipe_buf_confirm,
6172         .release                = buffer_pipe_buf_release,
6173         .steal                  = generic_pipe_buf_steal,
6174         .get                    = buffer_pipe_buf_get,
6175 };
6176
6177 /*
6178  * Callback from splice_to_pipe(), if we need to release some pages
6179  * at the end of the spd in case we error'ed out in filling the pipe.
6180  */
6181 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6182 {
6183         struct buffer_ref *ref =
6184                 (struct buffer_ref *)spd->partial[i].private;
6185
6186         if (--ref->ref)
6187                 return;
6188
6189         ring_buffer_free_read_page(ref->buffer, ref->page);
6190         kfree(ref);
6191         spd->partial[i].private = 0;
6192 }
6193
6194 static ssize_t
6195 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6196                             struct pipe_inode_info *pipe, size_t len,
6197                             unsigned int flags)
6198 {
6199         struct ftrace_buffer_info *info = file->private_data;
6200         struct trace_iterator *iter = &info->iter;
6201         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6202         struct page *pages_def[PIPE_DEF_BUFFERS];
6203         struct splice_pipe_desc spd = {
6204                 .pages          = pages_def,
6205                 .partial        = partial_def,
6206                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6207                 .flags          = flags,
6208                 .ops            = &buffer_pipe_buf_ops,
6209                 .spd_release    = buffer_spd_release,
6210         };
6211         struct buffer_ref *ref;
6212         int entries, i;
6213         ssize_t ret = 0;
6214
6215 #ifdef CONFIG_TRACER_MAX_TRACE
6216         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6217                 return -EBUSY;
6218 #endif
6219
6220         if (*ppos & (PAGE_SIZE - 1))
6221                 return -EINVAL;
6222
6223         if (len & (PAGE_SIZE - 1)) {
6224                 if (len < PAGE_SIZE)
6225                         return -EINVAL;
6226                 len &= PAGE_MASK;
6227         }
6228
6229         if (splice_grow_spd(pipe, &spd))
6230                 return -ENOMEM;
6231
6232  again:
6233         trace_access_lock(iter->cpu_file);
6234         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6235
6236         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6237                 struct page *page;
6238                 int r;
6239
6240                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6241                 if (!ref) {
6242                         ret = -ENOMEM;
6243                         break;
6244                 }
6245
6246                 ref->ref = 1;
6247                 ref->buffer = iter->trace_buffer->buffer;
6248                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6249                 if (!ref->page) {
6250                         ret = -ENOMEM;
6251                         kfree(ref);
6252                         break;
6253                 }
6254
6255                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6256                                           len, iter->cpu_file, 1);
6257                 if (r < 0) {
6258                         ring_buffer_free_read_page(ref->buffer, ref->page);
6259                         kfree(ref);
6260                         break;
6261                 }
6262
6263                 page = virt_to_page(ref->page);
6264
6265                 spd.pages[i] = page;
6266                 spd.partial[i].len = PAGE_SIZE;
6267                 spd.partial[i].offset = 0;
6268                 spd.partial[i].private = (unsigned long)ref;
6269                 spd.nr_pages++;
6270                 *ppos += PAGE_SIZE;
6271
6272                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6273         }
6274
6275         trace_access_unlock(iter->cpu_file);
6276         spd.nr_pages = i;
6277
6278         /* did we read anything? */
6279         if (!spd.nr_pages) {
6280                 if (ret)
6281                         goto out;
6282
6283                 ret = -EAGAIN;
6284                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6285                         goto out;
6286
6287                 ret = wait_on_pipe(iter, true);
6288                 if (ret)
6289                         goto out;
6290
6291                 goto again;
6292         }
6293
6294         ret = splice_to_pipe(pipe, &spd);
6295 out:
6296         splice_shrink_spd(&spd);
6297
6298         return ret;
6299 }
6300
6301 static const struct file_operations tracing_buffers_fops = {
6302         .open           = tracing_buffers_open,
6303         .read           = tracing_buffers_read,
6304         .poll           = tracing_buffers_poll,
6305         .release        = tracing_buffers_release,
6306         .splice_read    = tracing_buffers_splice_read,
6307         .llseek         = no_llseek,
6308 };
6309
6310 static ssize_t
6311 tracing_stats_read(struct file *filp, char __user *ubuf,
6312                    size_t count, loff_t *ppos)
6313 {
6314         struct inode *inode = file_inode(filp);
6315         struct trace_array *tr = inode->i_private;
6316         struct trace_buffer *trace_buf = &tr->trace_buffer;
6317         int cpu = tracing_get_cpu(inode);
6318         struct trace_seq *s;
6319         unsigned long cnt;
6320         unsigned long long t;
6321         unsigned long usec_rem;
6322
6323         s = kmalloc(sizeof(*s), GFP_KERNEL);
6324         if (!s)
6325                 return -ENOMEM;
6326
6327         trace_seq_init(s);
6328
6329         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6330         trace_seq_printf(s, "entries: %ld\n", cnt);
6331
6332         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6333         trace_seq_printf(s, "overrun: %ld\n", cnt);
6334
6335         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6336         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6337
6338         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6339         trace_seq_printf(s, "bytes: %ld\n", cnt);
6340
6341         if (trace_clocks[tr->clock_id].in_ns) {
6342                 /* local or global for trace_clock */
6343                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6344                 usec_rem = do_div(t, USEC_PER_SEC);
6345                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6346                                                                 t, usec_rem);
6347
6348                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6349                 usec_rem = do_div(t, USEC_PER_SEC);
6350                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6351         } else {
6352                 /* counter or tsc mode for trace_clock */
6353                 trace_seq_printf(s, "oldest event ts: %llu\n",
6354                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6355
6356                 trace_seq_printf(s, "now ts: %llu\n",
6357                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6358         }
6359
6360         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6361         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6362
6363         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6364         trace_seq_printf(s, "read events: %ld\n", cnt);
6365
6366         count = simple_read_from_buffer(ubuf, count, ppos,
6367                                         s->buffer, trace_seq_used(s));
6368
6369         kfree(s);
6370
6371         return count;
6372 }
6373
6374 static const struct file_operations tracing_stats_fops = {
6375         .open           = tracing_open_generic_tr,
6376         .read           = tracing_stats_read,
6377         .llseek         = generic_file_llseek,
6378         .release        = tracing_release_generic_tr,
6379 };
6380
6381 #ifdef CONFIG_DYNAMIC_FTRACE
6382
6383 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6384 {
6385         return 0;
6386 }
6387
6388 static ssize_t
6389 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6390                   size_t cnt, loff_t *ppos)
6391 {
6392         static char ftrace_dyn_info_buffer[1024];
6393         static DEFINE_MUTEX(dyn_info_mutex);
6394         unsigned long *p = filp->private_data;
6395         char *buf = ftrace_dyn_info_buffer;
6396         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6397         int r;
6398
6399         mutex_lock(&dyn_info_mutex);
6400         r = sprintf(buf, "%ld ", *p);
6401
6402         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6403         buf[r++] = '\n';
6404
6405         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6406
6407         mutex_unlock(&dyn_info_mutex);
6408
6409         return r;
6410 }
6411
6412 static const struct file_operations tracing_dyn_info_fops = {
6413         .open           = tracing_open_generic,
6414         .read           = tracing_read_dyn_info,
6415         .llseek         = generic_file_llseek,
6416 };
6417 #endif /* CONFIG_DYNAMIC_FTRACE */
6418
6419 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6420 static void
6421 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6422 {
6423         tracing_snapshot();
6424 }
6425
6426 static void
6427 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6428 {
6429         unsigned long *count = (long *)data;
6430
6431         if (!*count)
6432                 return;
6433
6434         if (*count != -1)
6435                 (*count)--;
6436
6437         tracing_snapshot();
6438 }
6439
6440 static int
6441 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6442                       struct ftrace_probe_ops *ops, void *data)
6443 {
6444         long count = (long)data;
6445
6446         seq_printf(m, "%ps:", (void *)ip);
6447
6448         seq_puts(m, "snapshot");
6449
6450         if (count == -1)
6451                 seq_puts(m, ":unlimited\n");
6452         else
6453                 seq_printf(m, ":count=%ld\n", count);
6454
6455         return 0;
6456 }
6457
6458 static struct ftrace_probe_ops snapshot_probe_ops = {
6459         .func                   = ftrace_snapshot,
6460         .print                  = ftrace_snapshot_print,
6461 };
6462
6463 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6464         .func                   = ftrace_count_snapshot,
6465         .print                  = ftrace_snapshot_print,
6466 };
6467
6468 static int
6469 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6470                                char *glob, char *cmd, char *param, int enable)
6471 {
6472         struct ftrace_probe_ops *ops;
6473         void *count = (void *)-1;
6474         char *number;
6475         int ret;
6476
6477         /* hash funcs only work with set_ftrace_filter */
6478         if (!enable)
6479                 return -EINVAL;
6480
6481         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6482
6483         if (glob[0] == '!') {
6484                 unregister_ftrace_function_probe_func(glob+1, ops);
6485                 return 0;
6486         }
6487
6488         if (!param)
6489                 goto out_reg;
6490
6491         number = strsep(&param, ":");
6492
6493         if (!strlen(number))
6494                 goto out_reg;
6495
6496         /*
6497          * We use the callback data field (which is a pointer)
6498          * as our counter.
6499          */
6500         ret = kstrtoul(number, 0, (unsigned long *)&count);
6501         if (ret)
6502                 return ret;
6503
6504  out_reg:
6505         ret = alloc_snapshot(&global_trace);
6506         if (ret < 0)
6507                 goto out;
6508
6509         ret = register_ftrace_function_probe(glob, ops, count);
6510
6511  out:
6512         return ret < 0 ? ret : 0;
6513 }
6514
6515 static struct ftrace_func_command ftrace_snapshot_cmd = {
6516         .name                   = "snapshot",
6517         .func                   = ftrace_trace_snapshot_callback,
6518 };
6519
6520 static __init int register_snapshot_cmd(void)
6521 {
6522         return register_ftrace_command(&ftrace_snapshot_cmd);
6523 }
6524 #else
6525 static inline __init int register_snapshot_cmd(void) { return 0; }
6526 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6527
6528 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6529 {
6530         if (WARN_ON(!tr->dir))
6531                 return ERR_PTR(-ENODEV);
6532
6533         /* Top directory uses NULL as the parent */
6534         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6535                 return NULL;
6536
6537         /* All sub buffers have a descriptor */
6538         return tr->dir;
6539 }
6540
6541 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6542 {
6543         struct dentry *d_tracer;
6544
6545         if (tr->percpu_dir)
6546                 return tr->percpu_dir;
6547
6548         d_tracer = tracing_get_dentry(tr);
6549         if (IS_ERR(d_tracer))
6550                 return NULL;
6551
6552         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6553
6554         WARN_ONCE(!tr->percpu_dir,
6555                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6556
6557         return tr->percpu_dir;
6558 }
6559
6560 static struct dentry *
6561 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6562                       void *data, long cpu, const struct file_operations *fops)
6563 {
6564         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6565
6566         if (ret) /* See tracing_get_cpu() */
6567                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6568         return ret;
6569 }
6570
6571 static void
6572 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6573 {
6574         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6575         struct dentry *d_cpu;
6576         char cpu_dir[30]; /* 30 characters should be more than enough */
6577
6578         if (!d_percpu)
6579                 return;
6580
6581         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6582         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6583         if (!d_cpu) {
6584                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6585                 return;
6586         }
6587
6588         /* per cpu trace_pipe */
6589         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6590                                 tr, cpu, &tracing_pipe_fops);
6591
6592         /* per cpu trace */
6593         trace_create_cpu_file("trace", 0644, d_cpu,
6594                                 tr, cpu, &tracing_fops);
6595
6596         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6597                                 tr, cpu, &tracing_buffers_fops);
6598
6599         trace_create_cpu_file("stats", 0444, d_cpu,
6600                                 tr, cpu, &tracing_stats_fops);
6601
6602         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6603                                 tr, cpu, &tracing_entries_fops);
6604
6605 #ifdef CONFIG_TRACER_SNAPSHOT
6606         trace_create_cpu_file("snapshot", 0644, d_cpu,
6607                                 tr, cpu, &snapshot_fops);
6608
6609         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6610                                 tr, cpu, &snapshot_raw_fops);
6611 #endif
6612 }
6613
6614 #ifdef CONFIG_FTRACE_SELFTEST
6615 /* Let selftest have access to static functions in this file */
6616 #include "trace_selftest.c"
6617 #endif
6618
6619 static ssize_t
6620 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6621                         loff_t *ppos)
6622 {
6623         struct trace_option_dentry *topt = filp->private_data;
6624         char *buf;
6625
6626         if (topt->flags->val & topt->opt->bit)
6627                 buf = "1\n";
6628         else
6629                 buf = "0\n";
6630
6631         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6632 }
6633
6634 static ssize_t
6635 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6636                          loff_t *ppos)
6637 {
6638         struct trace_option_dentry *topt = filp->private_data;
6639         unsigned long val;
6640         int ret;
6641
6642         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6643         if (ret)
6644                 return ret;
6645
6646         if (val != 0 && val != 1)
6647                 return -EINVAL;
6648
6649         if (!!(topt->flags->val & topt->opt->bit) != val) {
6650                 mutex_lock(&trace_types_lock);
6651                 ret = __set_tracer_option(topt->tr, topt->flags,
6652                                           topt->opt, !val);
6653                 mutex_unlock(&trace_types_lock);
6654                 if (ret)
6655                         return ret;
6656         }
6657
6658         *ppos += cnt;
6659
6660         return cnt;
6661 }
6662
6663
6664 static const struct file_operations trace_options_fops = {
6665         .open = tracing_open_generic,
6666         .read = trace_options_read,
6667         .write = trace_options_write,
6668         .llseek = generic_file_llseek,
6669 };
6670
6671 /*
6672  * In order to pass in both the trace_array descriptor as well as the index
6673  * to the flag that the trace option file represents, the trace_array
6674  * has a character array of trace_flags_index[], which holds the index
6675  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6676  * The address of this character array is passed to the flag option file
6677  * read/write callbacks.
6678  *
6679  * In order to extract both the index and the trace_array descriptor,
6680  * get_tr_index() uses the following algorithm.
6681  *
6682  *   idx = *ptr;
6683  *
6684  * As the pointer itself contains the address of the index (remember
6685  * index[1] == 1).
6686  *
6687  * Then to get the trace_array descriptor, by subtracting that index
6688  * from the ptr, we get to the start of the index itself.
6689  *
6690  *   ptr - idx == &index[0]
6691  *
6692  * Then a simple container_of() from that pointer gets us to the
6693  * trace_array descriptor.
6694  */
6695 static void get_tr_index(void *data, struct trace_array **ptr,
6696                          unsigned int *pindex)
6697 {
6698         *pindex = *(unsigned char *)data;
6699
6700         *ptr = container_of(data - *pindex, struct trace_array,
6701                             trace_flags_index);
6702 }
6703
6704 static ssize_t
6705 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6706                         loff_t *ppos)
6707 {
6708         void *tr_index = filp->private_data;
6709         struct trace_array *tr;
6710         unsigned int index;
6711         char *buf;
6712
6713         get_tr_index(tr_index, &tr, &index);
6714
6715         if (tr->trace_flags & (1 << index))
6716                 buf = "1\n";
6717         else
6718                 buf = "0\n";
6719
6720         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6721 }
6722
6723 static ssize_t
6724 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6725                          loff_t *ppos)
6726 {
6727         void *tr_index = filp->private_data;
6728         struct trace_array *tr;
6729         unsigned int index;
6730         unsigned long val;
6731         int ret;
6732
6733         get_tr_index(tr_index, &tr, &index);
6734
6735         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6736         if (ret)
6737                 return ret;
6738
6739         if (val != 0 && val != 1)
6740                 return -EINVAL;
6741
6742         mutex_lock(&trace_types_lock);
6743         ret = set_tracer_flag(tr, 1 << index, val);
6744         mutex_unlock(&trace_types_lock);
6745
6746         if (ret < 0)
6747                 return ret;
6748
6749         *ppos += cnt;
6750
6751         return cnt;
6752 }
6753
6754 static const struct file_operations trace_options_core_fops = {
6755         .open = tracing_open_generic,
6756         .read = trace_options_core_read,
6757         .write = trace_options_core_write,
6758         .llseek = generic_file_llseek,
6759 };
6760
6761 struct dentry *trace_create_file(const char *name,
6762                                  umode_t mode,
6763                                  struct dentry *parent,
6764                                  void *data,
6765                                  const struct file_operations *fops)
6766 {
6767         struct dentry *ret;
6768
6769         ret = tracefs_create_file(name, mode, parent, data, fops);
6770         if (!ret)
6771                 pr_warn("Could not create tracefs '%s' entry\n", name);
6772
6773         return ret;
6774 }
6775
6776
6777 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6778 {
6779         struct dentry *d_tracer;
6780
6781         if (tr->options)
6782                 return tr->options;
6783
6784         d_tracer = tracing_get_dentry(tr);
6785         if (IS_ERR(d_tracer))
6786                 return NULL;
6787
6788         tr->options = tracefs_create_dir("options", d_tracer);
6789         if (!tr->options) {
6790                 pr_warn("Could not create tracefs directory 'options'\n");
6791                 return NULL;
6792         }
6793
6794         return tr->options;
6795 }
6796
6797 static void
6798 create_trace_option_file(struct trace_array *tr,
6799                          struct trace_option_dentry *topt,
6800                          struct tracer_flags *flags,
6801                          struct tracer_opt *opt)
6802 {
6803         struct dentry *t_options;
6804
6805         t_options = trace_options_init_dentry(tr);
6806         if (!t_options)
6807                 return;
6808
6809         topt->flags = flags;
6810         topt->opt = opt;
6811         topt->tr = tr;
6812
6813         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6814                                     &trace_options_fops);
6815
6816 }
6817
6818 static void
6819 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6820 {
6821         struct trace_option_dentry *topts;
6822         struct trace_options *tr_topts;
6823         struct tracer_flags *flags;
6824         struct tracer_opt *opts;
6825         int cnt;
6826         int i;
6827
6828         if (!tracer)
6829                 return;
6830
6831         flags = tracer->flags;
6832
6833         if (!flags || !flags->opts)
6834                 return;
6835
6836         /*
6837          * If this is an instance, only create flags for tracers
6838          * the instance may have.
6839          */
6840         if (!trace_ok_for_array(tracer, tr))
6841                 return;
6842
6843         for (i = 0; i < tr->nr_topts; i++) {
6844                 /* Make sure there's no duplicate flags. */
6845                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6846                         return;
6847         }
6848
6849         opts = flags->opts;
6850
6851         for (cnt = 0; opts[cnt].name; cnt++)
6852                 ;
6853
6854         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6855         if (!topts)
6856                 return;
6857
6858         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6859                             GFP_KERNEL);
6860         if (!tr_topts) {
6861                 kfree(topts);
6862                 return;
6863         }
6864
6865         tr->topts = tr_topts;
6866         tr->topts[tr->nr_topts].tracer = tracer;
6867         tr->topts[tr->nr_topts].topts = topts;
6868         tr->nr_topts++;
6869
6870         for (cnt = 0; opts[cnt].name; cnt++) {
6871                 create_trace_option_file(tr, &topts[cnt], flags,
6872                                          &opts[cnt]);
6873                 WARN_ONCE(topts[cnt].entry == NULL,
6874                           "Failed to create trace option: %s",
6875                           opts[cnt].name);
6876         }
6877 }
6878
6879 static struct dentry *
6880 create_trace_option_core_file(struct trace_array *tr,
6881                               const char *option, long index)
6882 {
6883         struct dentry *t_options;
6884
6885         t_options = trace_options_init_dentry(tr);
6886         if (!t_options)
6887                 return NULL;
6888
6889         return trace_create_file(option, 0644, t_options,
6890                                  (void *)&tr->trace_flags_index[index],
6891                                  &trace_options_core_fops);
6892 }
6893
6894 static void create_trace_options_dir(struct trace_array *tr)
6895 {
6896         struct dentry *t_options;
6897         bool top_level = tr == &global_trace;
6898         int i;
6899
6900         t_options = trace_options_init_dentry(tr);
6901         if (!t_options)
6902                 return;
6903
6904         for (i = 0; trace_options[i]; i++) {
6905                 if (top_level ||
6906                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6907                         create_trace_option_core_file(tr, trace_options[i], i);
6908         }
6909 }
6910
6911 static ssize_t
6912 rb_simple_read(struct file *filp, char __user *ubuf,
6913                size_t cnt, loff_t *ppos)
6914 {
6915         struct trace_array *tr = filp->private_data;
6916         char buf[64];
6917         int r;
6918
6919         r = tracer_tracing_is_on(tr);
6920         r = sprintf(buf, "%d\n", r);
6921
6922         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6923 }
6924
6925 static ssize_t
6926 rb_simple_write(struct file *filp, const char __user *ubuf,
6927                 size_t cnt, loff_t *ppos)
6928 {
6929         struct trace_array *tr = filp->private_data;
6930         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6931         unsigned long val;
6932         int ret;
6933
6934         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6935         if (ret)
6936                 return ret;
6937
6938         if (buffer) {
6939                 mutex_lock(&trace_types_lock);
6940                 if (!!val == tracer_tracing_is_on(tr)) {
6941                         val = 0; /* do nothing */
6942                 } else if (val) {
6943                         tracer_tracing_on(tr);
6944                         if (tr->current_trace->start)
6945                                 tr->current_trace->start(tr);
6946                 } else {
6947                         tracer_tracing_off(tr);
6948                         if (tr->current_trace->stop)
6949                                 tr->current_trace->stop(tr);
6950                 }
6951                 mutex_unlock(&trace_types_lock);
6952         }
6953
6954         (*ppos)++;
6955
6956         return cnt;
6957 }
6958
6959 static const struct file_operations rb_simple_fops = {
6960         .open           = tracing_open_generic_tr,
6961         .read           = rb_simple_read,
6962         .write          = rb_simple_write,
6963         .release        = tracing_release_generic_tr,
6964         .llseek         = default_llseek,
6965 };
6966
6967 struct dentry *trace_instance_dir;
6968
6969 static void
6970 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6971
6972 static int
6973 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6974 {
6975         enum ring_buffer_flags rb_flags;
6976
6977         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6978
6979         buf->tr = tr;
6980
6981         buf->buffer = ring_buffer_alloc(size, rb_flags);
6982         if (!buf->buffer)
6983                 return -ENOMEM;
6984
6985         buf->data = alloc_percpu(struct trace_array_cpu);
6986         if (!buf->data) {
6987                 ring_buffer_free(buf->buffer);
6988                 buf->buffer = NULL;
6989                 return -ENOMEM;
6990         }
6991
6992         /* Allocate the first page for all buffers */
6993         set_buffer_entries(&tr->trace_buffer,
6994                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6995
6996         return 0;
6997 }
6998
6999 static int allocate_trace_buffers(struct trace_array *tr, int size)
7000 {
7001         int ret;
7002
7003         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7004         if (ret)
7005                 return ret;
7006
7007 #ifdef CONFIG_TRACER_MAX_TRACE
7008         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7009                                     allocate_snapshot ? size : 1);
7010         if (WARN_ON(ret)) {
7011                 ring_buffer_free(tr->trace_buffer.buffer);
7012                 tr->trace_buffer.buffer = NULL;
7013                 free_percpu(tr->trace_buffer.data);
7014                 tr->trace_buffer.data = NULL;
7015                 return -ENOMEM;
7016         }
7017         tr->allocated_snapshot = allocate_snapshot;
7018
7019         /*
7020          * Only the top level trace array gets its snapshot allocated
7021          * from the kernel command line.
7022          */
7023         allocate_snapshot = false;
7024 #endif
7025
7026         /*
7027          * Because of some magic with the way alloc_percpu() works on
7028          * x86_64, we need to synchronize the pgd of all the tables,
7029          * otherwise the trace events that happen in x86_64 page fault
7030          * handlers can't cope with accessing the chance that a
7031          * alloc_percpu()'d memory might be touched in the page fault trace
7032          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7033          * calls in tracing, because something might get triggered within a
7034          * page fault trace event!
7035          */
7036         vmalloc_sync_mappings();
7037
7038         return 0;
7039 }
7040
7041 static void free_trace_buffer(struct trace_buffer *buf)
7042 {
7043         if (buf->buffer) {
7044                 ring_buffer_free(buf->buffer);
7045                 buf->buffer = NULL;
7046                 free_percpu(buf->data);
7047                 buf->data = NULL;
7048         }
7049 }
7050
7051 static void free_trace_buffers(struct trace_array *tr)
7052 {
7053         if (!tr)
7054                 return;
7055
7056         free_trace_buffer(&tr->trace_buffer);
7057
7058 #ifdef CONFIG_TRACER_MAX_TRACE
7059         free_trace_buffer(&tr->max_buffer);
7060 #endif
7061 }
7062
7063 static void init_trace_flags_index(struct trace_array *tr)
7064 {
7065         int i;
7066
7067         /* Used by the trace options files */
7068         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7069                 tr->trace_flags_index[i] = i;
7070 }
7071
7072 static void __update_tracer_options(struct trace_array *tr)
7073 {
7074         struct tracer *t;
7075
7076         for (t = trace_types; t; t = t->next)
7077                 add_tracer_options(tr, t);
7078 }
7079
7080 static void update_tracer_options(struct trace_array *tr)
7081 {
7082         mutex_lock(&trace_types_lock);
7083         __update_tracer_options(tr);
7084         mutex_unlock(&trace_types_lock);
7085 }
7086
7087 static int instance_mkdir(const char *name)
7088 {
7089         struct trace_array *tr;
7090         int ret;
7091
7092         mutex_lock(&trace_types_lock);
7093
7094         ret = -EEXIST;
7095         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7096                 if (tr->name && strcmp(tr->name, name) == 0)
7097                         goto out_unlock;
7098         }
7099
7100         ret = -ENOMEM;
7101         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7102         if (!tr)
7103                 goto out_unlock;
7104
7105         tr->name = kstrdup(name, GFP_KERNEL);
7106         if (!tr->name)
7107                 goto out_free_tr;
7108
7109         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7110                 goto out_free_tr;
7111
7112         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7113
7114         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7115
7116         raw_spin_lock_init(&tr->start_lock);
7117
7118         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7119
7120         tr->current_trace = &nop_trace;
7121
7122         INIT_LIST_HEAD(&tr->systems);
7123         INIT_LIST_HEAD(&tr->events);
7124
7125         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7126                 goto out_free_tr;
7127
7128         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7129         if (!tr->dir)
7130                 goto out_free_tr;
7131
7132         ret = event_trace_add_tracer(tr->dir, tr);
7133         if (ret) {
7134                 tracefs_remove_recursive(tr->dir);
7135                 goto out_free_tr;
7136         }
7137
7138         init_tracer_tracefs(tr, tr->dir);
7139         init_trace_flags_index(tr);
7140         __update_tracer_options(tr);
7141
7142         list_add(&tr->list, &ftrace_trace_arrays);
7143
7144         mutex_unlock(&trace_types_lock);
7145
7146         return 0;
7147
7148  out_free_tr:
7149         free_trace_buffers(tr);
7150         free_cpumask_var(tr->tracing_cpumask);
7151         kfree(tr->name);
7152         kfree(tr);
7153
7154  out_unlock:
7155         mutex_unlock(&trace_types_lock);
7156
7157         return ret;
7158
7159 }
7160
7161 static int instance_rmdir(const char *name)
7162 {
7163         struct trace_array *tr;
7164         int found = 0;
7165         int ret;
7166         int i;
7167
7168         mutex_lock(&trace_types_lock);
7169
7170         ret = -ENODEV;
7171         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7172                 if (tr->name && strcmp(tr->name, name) == 0) {
7173                         found = 1;
7174                         break;
7175                 }
7176         }
7177         if (!found)
7178                 goto out_unlock;
7179
7180         ret = -EBUSY;
7181         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7182                 goto out_unlock;
7183
7184         list_del(&tr->list);
7185
7186         /* Disable all the flags that were enabled coming in */
7187         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7188                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7189                         set_tracer_flag(tr, 1 << i, 0);
7190         }
7191
7192         tracing_set_nop(tr);
7193         event_trace_del_tracer(tr);
7194         ftrace_clear_pids(tr);
7195         ftrace_destroy_function_files(tr);
7196         tracefs_remove_recursive(tr->dir);
7197         free_trace_buffers(tr);
7198
7199         for (i = 0; i < tr->nr_topts; i++) {
7200                 kfree(tr->topts[i].topts);
7201         }
7202         kfree(tr->topts);
7203
7204         free_cpumask_var(tr->tracing_cpumask);
7205         kfree(tr->name);
7206         kfree(tr);
7207
7208         ret = 0;
7209
7210  out_unlock:
7211         mutex_unlock(&trace_types_lock);
7212
7213         return ret;
7214 }
7215
7216 static __init void create_trace_instances(struct dentry *d_tracer)
7217 {
7218         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7219                                                          instance_mkdir,
7220                                                          instance_rmdir);
7221         if (WARN_ON(!trace_instance_dir))
7222                 return;
7223 }
7224
7225 static void
7226 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7227 {
7228         int cpu;
7229
7230         trace_create_file("available_tracers", 0444, d_tracer,
7231                         tr, &show_traces_fops);
7232
7233         trace_create_file("current_tracer", 0644, d_tracer,
7234                         tr, &set_tracer_fops);
7235
7236         trace_create_file("tracing_cpumask", 0644, d_tracer,
7237                           tr, &tracing_cpumask_fops);
7238
7239         trace_create_file("trace_options", 0644, d_tracer,
7240                           tr, &tracing_iter_fops);
7241
7242         trace_create_file("trace", 0644, d_tracer,
7243                           tr, &tracing_fops);
7244
7245         trace_create_file("trace_pipe", 0444, d_tracer,
7246                           tr, &tracing_pipe_fops);
7247
7248         trace_create_file("buffer_size_kb", 0644, d_tracer,
7249                           tr, &tracing_entries_fops);
7250
7251         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7252                           tr, &tracing_total_entries_fops);
7253
7254         trace_create_file("free_buffer", 0200, d_tracer,
7255                           tr, &tracing_free_buffer_fops);
7256
7257         trace_create_file("trace_marker", 0220, d_tracer,
7258                           tr, &tracing_mark_fops);
7259
7260         trace_create_file("trace_clock", 0644, d_tracer, tr,
7261                           &trace_clock_fops);
7262
7263         trace_create_file("tracing_on", 0644, d_tracer,
7264                           tr, &rb_simple_fops);
7265
7266         create_trace_options_dir(tr);
7267
7268 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7269         trace_create_file("tracing_max_latency", 0644, d_tracer,
7270                         &tr->max_latency, &tracing_max_lat_fops);
7271 #endif
7272
7273         if (ftrace_create_function_files(tr, d_tracer))
7274                 WARN(1, "Could not allocate function filter files");
7275
7276 #ifdef CONFIG_TRACER_SNAPSHOT
7277         trace_create_file("snapshot", 0644, d_tracer,
7278                           tr, &snapshot_fops);
7279 #endif
7280
7281         for_each_tracing_cpu(cpu)
7282                 tracing_init_tracefs_percpu(tr, cpu);
7283
7284         ftrace_init_tracefs(tr, d_tracer);
7285 }
7286
7287 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7288 {
7289         struct vfsmount *mnt;
7290         struct file_system_type *type;
7291
7292         /*
7293          * To maintain backward compatibility for tools that mount
7294          * debugfs to get to the tracing facility, tracefs is automatically
7295          * mounted to the debugfs/tracing directory.
7296          */
7297         type = get_fs_type("tracefs");
7298         if (!type)
7299                 return NULL;
7300         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7301         put_filesystem(type);
7302         if (IS_ERR(mnt))
7303                 return NULL;
7304         mntget(mnt);
7305
7306         return mnt;
7307 }
7308
7309 /**
7310  * tracing_init_dentry - initialize top level trace array
7311  *
7312  * This is called when creating files or directories in the tracing
7313  * directory. It is called via fs_initcall() by any of the boot up code
7314  * and expects to return the dentry of the top level tracing directory.
7315  */
7316 struct dentry *tracing_init_dentry(void)
7317 {
7318         struct trace_array *tr = &global_trace;
7319
7320         /* The top level trace array uses  NULL as parent */
7321         if (tr->dir)
7322                 return NULL;
7323
7324         if (WARN_ON(!tracefs_initialized()) ||
7325                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7326                  WARN_ON(!debugfs_initialized())))
7327                 return ERR_PTR(-ENODEV);
7328
7329         /*
7330          * As there may still be users that expect the tracing
7331          * files to exist in debugfs/tracing, we must automount
7332          * the tracefs file system there, so older tools still
7333          * work with the newer kerenl.
7334          */
7335         tr->dir = debugfs_create_automount("tracing", NULL,
7336                                            trace_automount, NULL);
7337         if (!tr->dir) {
7338                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7339                 return ERR_PTR(-ENOMEM);
7340         }
7341
7342         return NULL;
7343 }
7344
7345 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7346 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7347
7348 static void __init trace_enum_init(void)
7349 {
7350         int len;
7351
7352         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7353         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7354 }
7355
7356 #ifdef CONFIG_MODULES
7357 static void trace_module_add_enums(struct module *mod)
7358 {
7359         if (!mod->num_trace_enums)
7360                 return;
7361
7362         /*
7363          * Modules with bad taint do not have events created, do
7364          * not bother with enums either.
7365          */
7366         if (trace_module_has_bad_taint(mod))
7367                 return;
7368
7369         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7370 }
7371
7372 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7373 static void trace_module_remove_enums(struct module *mod)
7374 {
7375         union trace_enum_map_item *map;
7376         union trace_enum_map_item **last = &trace_enum_maps;
7377
7378         if (!mod->num_trace_enums)
7379                 return;
7380
7381         mutex_lock(&trace_enum_mutex);
7382
7383         map = trace_enum_maps;
7384
7385         while (map) {
7386                 if (map->head.mod == mod)
7387                         break;
7388                 map = trace_enum_jmp_to_tail(map);
7389                 last = &map->tail.next;
7390                 map = map->tail.next;
7391         }
7392         if (!map)
7393                 goto out;
7394
7395         *last = trace_enum_jmp_to_tail(map)->tail.next;
7396         kfree(map);
7397  out:
7398         mutex_unlock(&trace_enum_mutex);
7399 }
7400 #else
7401 static inline void trace_module_remove_enums(struct module *mod) { }
7402 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7403
7404 static int trace_module_notify(struct notifier_block *self,
7405                                unsigned long val, void *data)
7406 {
7407         struct module *mod = data;
7408
7409         switch (val) {
7410         case MODULE_STATE_COMING:
7411                 trace_module_add_enums(mod);
7412                 break;
7413         case MODULE_STATE_GOING:
7414                 trace_module_remove_enums(mod);
7415                 break;
7416         }
7417
7418         return 0;
7419 }
7420
7421 static struct notifier_block trace_module_nb = {
7422         .notifier_call = trace_module_notify,
7423         .priority = 0,
7424 };
7425 #endif /* CONFIG_MODULES */
7426
7427 static __init int tracer_init_tracefs(void)
7428 {
7429         struct dentry *d_tracer;
7430
7431         trace_access_lock_init();
7432
7433         d_tracer = tracing_init_dentry();
7434         if (IS_ERR(d_tracer))
7435                 return 0;
7436
7437         init_tracer_tracefs(&global_trace, d_tracer);
7438         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7439
7440         trace_create_file("tracing_thresh", 0644, d_tracer,
7441                         &global_trace, &tracing_thresh_fops);
7442
7443         trace_create_file("README", 0444, d_tracer,
7444                         NULL, &tracing_readme_fops);
7445
7446         trace_create_file("saved_cmdlines", 0444, d_tracer,
7447                         NULL, &tracing_saved_cmdlines_fops);
7448
7449         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7450                           NULL, &tracing_saved_cmdlines_size_fops);
7451
7452         trace_enum_init();
7453
7454         trace_create_enum_file(d_tracer);
7455
7456 #ifdef CONFIG_MODULES
7457         register_module_notifier(&trace_module_nb);
7458 #endif
7459
7460 #ifdef CONFIG_DYNAMIC_FTRACE
7461         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7462                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7463 #endif
7464
7465         create_trace_instances(d_tracer);
7466
7467         update_tracer_options(&global_trace);
7468
7469         return 0;
7470 }
7471
7472 static int trace_panic_handler(struct notifier_block *this,
7473                                unsigned long event, void *unused)
7474 {
7475         if (ftrace_dump_on_oops)
7476                 ftrace_dump(ftrace_dump_on_oops);
7477         return NOTIFY_OK;
7478 }
7479
7480 static struct notifier_block trace_panic_notifier = {
7481         .notifier_call  = trace_panic_handler,
7482         .next           = NULL,
7483         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7484 };
7485
7486 static int trace_die_handler(struct notifier_block *self,
7487                              unsigned long val,
7488                              void *data)
7489 {
7490         switch (val) {
7491         case DIE_OOPS:
7492                 if (ftrace_dump_on_oops)
7493                         ftrace_dump(ftrace_dump_on_oops);
7494                 break;
7495         default:
7496                 break;
7497         }
7498         return NOTIFY_OK;
7499 }
7500
7501 static struct notifier_block trace_die_notifier = {
7502         .notifier_call = trace_die_handler,
7503         .priority = 200
7504 };
7505
7506 /*
7507  * printk is set to max of 1024, we really don't need it that big.
7508  * Nothing should be printing 1000 characters anyway.
7509  */
7510 #define TRACE_MAX_PRINT         1000
7511
7512 /*
7513  * Define here KERN_TRACE so that we have one place to modify
7514  * it if we decide to change what log level the ftrace dump
7515  * should be at.
7516  */
7517 #define KERN_TRACE              KERN_EMERG
7518
7519 void
7520 trace_printk_seq(struct trace_seq *s)
7521 {
7522         /* Probably should print a warning here. */
7523         if (s->seq.len >= TRACE_MAX_PRINT)
7524                 s->seq.len = TRACE_MAX_PRINT;
7525
7526         /*
7527          * More paranoid code. Although the buffer size is set to
7528          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7529          * an extra layer of protection.
7530          */
7531         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7532                 s->seq.len = s->seq.size - 1;
7533
7534         /* should be zero ended, but we are paranoid. */
7535         s->buffer[s->seq.len] = 0;
7536
7537         printk(KERN_TRACE "%s", s->buffer);
7538
7539         trace_seq_init(s);
7540 }
7541
7542 void trace_init_global_iter(struct trace_iterator *iter)
7543 {
7544         iter->tr = &global_trace;
7545         iter->trace = iter->tr->current_trace;
7546         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7547         iter->trace_buffer = &global_trace.trace_buffer;
7548
7549         if (iter->trace && iter->trace->open)
7550                 iter->trace->open(iter);
7551
7552         /* Annotate start of buffers if we had overruns */
7553         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7554                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7555
7556         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7557         if (trace_clocks[iter->tr->clock_id].in_ns)
7558                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7559 }
7560
7561 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7562 {
7563         /* use static because iter can be a bit big for the stack */
7564         static struct trace_iterator iter;
7565         static atomic_t dump_running;
7566         struct trace_array *tr = &global_trace;
7567         unsigned int old_userobj;
7568         unsigned long flags;
7569         int cnt = 0, cpu;
7570
7571         /* Only allow one dump user at a time. */
7572         if (atomic_inc_return(&dump_running) != 1) {
7573                 atomic_dec(&dump_running);
7574                 return;
7575         }
7576
7577         /*
7578          * Always turn off tracing when we dump.
7579          * We don't need to show trace output of what happens
7580          * between multiple crashes.
7581          *
7582          * If the user does a sysrq-z, then they can re-enable
7583          * tracing with echo 1 > tracing_on.
7584          */
7585         tracing_off();
7586
7587         local_irq_save(flags);
7588
7589         /* Simulate the iterator */
7590         trace_init_global_iter(&iter);
7591
7592         for_each_tracing_cpu(cpu) {
7593                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7594         }
7595
7596         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7597
7598         /* don't look at user memory in panic mode */
7599         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7600
7601         switch (oops_dump_mode) {
7602         case DUMP_ALL:
7603                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7604                 break;
7605         case DUMP_ORIG:
7606                 iter.cpu_file = raw_smp_processor_id();
7607                 break;
7608         case DUMP_NONE:
7609                 goto out_enable;
7610         default:
7611                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7612                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7613         }
7614
7615         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7616
7617         /* Did function tracer already get disabled? */
7618         if (ftrace_is_dead()) {
7619                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7620                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7621         }
7622
7623         /*
7624          * We need to stop all tracing on all CPUS to read the
7625          * the next buffer. This is a bit expensive, but is
7626          * not done often. We fill all what we can read,
7627          * and then release the locks again.
7628          */
7629
7630         while (!trace_empty(&iter)) {
7631
7632                 if (!cnt)
7633                         printk(KERN_TRACE "---------------------------------\n");
7634
7635                 cnt++;
7636
7637                 trace_iterator_reset(&iter);
7638                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7639
7640                 if (trace_find_next_entry_inc(&iter) != NULL) {
7641                         int ret;
7642
7643                         ret = print_trace_line(&iter);
7644                         if (ret != TRACE_TYPE_NO_CONSUME)
7645                                 trace_consume(&iter);
7646                 }
7647                 touch_nmi_watchdog();
7648
7649                 trace_printk_seq(&iter.seq);
7650         }
7651
7652         if (!cnt)
7653                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7654         else
7655                 printk(KERN_TRACE "---------------------------------\n");
7656
7657  out_enable:
7658         tr->trace_flags |= old_userobj;
7659
7660         for_each_tracing_cpu(cpu) {
7661                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7662         }
7663         atomic_dec(&dump_running);
7664         local_irq_restore(flags);
7665 }
7666 EXPORT_SYMBOL_GPL(ftrace_dump);
7667
7668 __init static int tracer_alloc_buffers(void)
7669 {
7670         int ring_buf_size;
7671         int ret = -ENOMEM;
7672
7673         /*
7674          * Make sure we don't accidently add more trace options
7675          * than we have bits for.
7676          */
7677         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7678
7679         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7680                 goto out;
7681
7682         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7683                 goto out_free_buffer_mask;
7684
7685         /* Only allocate trace_printk buffers if a trace_printk exists */
7686         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7687                 /* Must be called before global_trace.buffer is allocated */
7688                 trace_printk_init_buffers();
7689
7690         /* To save memory, keep the ring buffer size to its minimum */
7691         if (ring_buffer_expanded)
7692                 ring_buf_size = trace_buf_size;
7693         else
7694                 ring_buf_size = 1;
7695
7696         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7697         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7698
7699         raw_spin_lock_init(&global_trace.start_lock);
7700
7701         /* Used for event triggers */
7702         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7703         if (!temp_buffer)
7704                 goto out_free_cpumask;
7705
7706         if (trace_create_savedcmd() < 0)
7707                 goto out_free_temp_buffer;
7708
7709         /* TODO: make the number of buffers hot pluggable with CPUS */
7710         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7711                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7712                 WARN_ON(1);
7713                 goto out_free_savedcmd;
7714         }
7715
7716         if (global_trace.buffer_disabled)
7717                 tracing_off();
7718
7719         if (trace_boot_clock) {
7720                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7721                 if (ret < 0)
7722                         pr_warn("Trace clock %s not defined, going back to default\n",
7723                                 trace_boot_clock);
7724         }
7725
7726         /*
7727          * register_tracer() might reference current_trace, so it
7728          * needs to be set before we register anything. This is
7729          * just a bootstrap of current_trace anyway.
7730          */
7731         global_trace.current_trace = &nop_trace;
7732
7733         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7734
7735         ftrace_init_global_array_ops(&global_trace);
7736
7737         init_trace_flags_index(&global_trace);
7738
7739         register_tracer(&nop_trace);
7740
7741         /* All seems OK, enable tracing */
7742         tracing_disabled = 0;
7743
7744         atomic_notifier_chain_register(&panic_notifier_list,
7745                                        &trace_panic_notifier);
7746
7747         register_die_notifier(&trace_die_notifier);
7748
7749         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7750
7751         INIT_LIST_HEAD(&global_trace.systems);
7752         INIT_LIST_HEAD(&global_trace.events);
7753         list_add(&global_trace.list, &ftrace_trace_arrays);
7754
7755         apply_trace_boot_options();
7756
7757         register_snapshot_cmd();
7758
7759         return 0;
7760
7761 out_free_savedcmd:
7762         free_saved_cmdlines_buffer(savedcmd);
7763 out_free_temp_buffer:
7764         ring_buffer_free(temp_buffer);
7765 out_free_cpumask:
7766         free_cpumask_var(global_trace.tracing_cpumask);
7767 out_free_buffer_mask:
7768         free_cpumask_var(tracing_buffer_mask);
7769 out:
7770         return ret;
7771 }
7772
7773 void __init trace_init(void)
7774 {
7775         if (tracepoint_printk) {
7776                 tracepoint_print_iter =
7777                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7778                 if (WARN_ON(!tracepoint_print_iter))
7779                         tracepoint_printk = 0;
7780         }
7781         tracer_alloc_buffers();
7782         trace_event_init();
7783 }
7784
7785 __init static int clear_boot_tracer(void)
7786 {
7787         /*
7788          * The default tracer at boot buffer is an init section.
7789          * This function is called in lateinit. If we did not
7790          * find the boot tracer, then clear it out, to prevent
7791          * later registration from accessing the buffer that is
7792          * about to be freed.
7793          */
7794         if (!default_bootup_tracer)
7795                 return 0;
7796
7797         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7798                default_bootup_tracer);
7799         default_bootup_tracer = NULL;
7800
7801         return 0;
7802 }
7803
7804 fs_initcall(tracer_init_tracefs);
7805 late_initcall_sync(clear_boot_tracer);