GNU Linux-libre 4.9.332-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         /* Ignore the "tp_printk_stop_on_boot" param */
232         if (*str == '_')
233                 return 0;
234
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(cycle_t nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         TRACE_ITER_EVENT_FORK
263
264 /*
265  * The global_trace is the descriptor that holds the tracing
266  * buffers for the live tracing. For each CPU, it contains
267  * a link list of pages that will store trace entries. The
268  * page descriptor of the pages in the memory is used to hold
269  * the link list by linking the lru item in the page descriptor
270  * to each of the pages in the buffer per CPU.
271  *
272  * For each active CPU there is a data field that holds the
273  * pages for the buffer for that CPU. Each CPU has the same number
274  * of pages allocated for its buffer.
275  */
276 static struct trace_array global_trace = {
277         .trace_flags = TRACE_DEFAULT_FLAGS,
278 };
279
280 LIST_HEAD(ftrace_trace_arrays);
281
282 int trace_array_get(struct trace_array *this_tr)
283 {
284         struct trace_array *tr;
285         int ret = -ENODEV;
286
287         mutex_lock(&trace_types_lock);
288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
289                 if (tr == this_tr) {
290                         tr->ref++;
291                         ret = 0;
292                         break;
293                 }
294         }
295         mutex_unlock(&trace_types_lock);
296
297         return ret;
298 }
299
300 static void __trace_array_put(struct trace_array *this_tr)
301 {
302         WARN_ON(!this_tr->ref);
303         this_tr->ref--;
304 }
305
306 void trace_array_put(struct trace_array *this_tr)
307 {
308         mutex_lock(&trace_types_lock);
309         __trace_array_put(this_tr);
310         mutex_unlock(&trace_types_lock);
311 }
312
313 int call_filter_check_discard(struct trace_event_call *call, void *rec,
314                               struct ring_buffer *buffer,
315                               struct ring_buffer_event *event)
316 {
317         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
318             !filter_match_preds(call->filter, rec)) {
319                 __trace_event_discard_commit(buffer, event);
320                 return 1;
321         }
322
323         return 0;
324 }
325
326 void trace_free_pid_list(struct trace_pid_list *pid_list)
327 {
328         vfree(pid_list->pids);
329         kfree(pid_list);
330 }
331
332 /**
333  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
334  * @filtered_pids: The list of pids to check
335  * @search_pid: The PID to find in @filtered_pids
336  *
337  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
338  */
339 bool
340 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
341 {
342         /*
343          * If pid_max changed after filtered_pids was created, we
344          * by default ignore all pids greater than the previous pid_max.
345          */
346         if (search_pid >= filtered_pids->pid_max)
347                 return false;
348
349         return test_bit(search_pid, filtered_pids->pids);
350 }
351
352 /**
353  * trace_ignore_this_task - should a task be ignored for tracing
354  * @filtered_pids: The list of pids to check
355  * @task: The task that should be ignored if not filtered
356  *
357  * Checks if @task should be traced or not from @filtered_pids.
358  * Returns true if @task should *NOT* be traced.
359  * Returns false if @task should be traced.
360  */
361 bool
362 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
363 {
364         /*
365          * Return false, because if filtered_pids does not exist,
366          * all pids are good to trace.
367          */
368         if (!filtered_pids)
369                 return false;
370
371         return !trace_find_filtered_pid(filtered_pids, task->pid);
372 }
373
374 /**
375  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
376  * @pid_list: The list to modify
377  * @self: The current task for fork or NULL for exit
378  * @task: The task to add or remove
379  *
380  * If adding a task, if @self is defined, the task is only added if @self
381  * is also included in @pid_list. This happens on fork and tasks should
382  * only be added when the parent is listed. If @self is NULL, then the
383  * @task pid will be removed from the list, which would happen on exit
384  * of a task.
385  */
386 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
387                                   struct task_struct *self,
388                                   struct task_struct *task)
389 {
390         if (!pid_list)
391                 return;
392
393         /* For forks, we only add if the forking task is listed */
394         if (self) {
395                 if (!trace_find_filtered_pid(pid_list, self->pid))
396                         return;
397         }
398
399         /* Sorry, but we don't support pid_max changing after setting */
400         if (task->pid >= pid_list->pid_max)
401                 return;
402
403         /* "self" is set for forks, and NULL for exits */
404         if (self)
405                 set_bit(task->pid, pid_list->pids);
406         else
407                 clear_bit(task->pid, pid_list->pids);
408 }
409
410 /**
411  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
412  * @pid_list: The pid list to show
413  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
414  * @pos: The position of the file
415  *
416  * This is used by the seq_file "next" operation to iterate the pids
417  * listed in a trace_pid_list structure.
418  *
419  * Returns the pid+1 as we want to display pid of zero, but NULL would
420  * stop the iteration.
421  */
422 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
423 {
424         unsigned long pid = (unsigned long)v;
425
426         (*pos)++;
427
428         /* pid already is +1 of the actual prevous bit */
429         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
430
431         /* Return pid + 1 to allow zero to be represented */
432         if (pid < pid_list->pid_max)
433                 return (void *)(pid + 1);
434
435         return NULL;
436 }
437
438 /**
439  * trace_pid_start - Used for seq_file to start reading pid lists
440  * @pid_list: The pid list to show
441  * @pos: The position of the file
442  *
443  * This is used by seq_file "start" operation to start the iteration
444  * of listing pids.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
450 {
451         unsigned long pid;
452         loff_t l = 0;
453
454         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
455         if (pid >= pid_list->pid_max)
456                 return NULL;
457
458         /* Return pid + 1 so that zero can be the exit value */
459         for (pid++; pid && l < *pos;
460              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
461                 ;
462         return (void *)pid;
463 }
464
465 /**
466  * trace_pid_show - show the current pid in seq_file processing
467  * @m: The seq_file structure to write into
468  * @v: A void pointer of the pid (+1) value to display
469  *
470  * Can be directly used by seq_file operations to display the current
471  * pid value.
472  */
473 int trace_pid_show(struct seq_file *m, void *v)
474 {
475         unsigned long pid = (unsigned long)v - 1;
476
477         seq_printf(m, "%lu\n", pid);
478         return 0;
479 }
480
481 /* 128 should be much more than enough */
482 #define PID_BUF_SIZE            127
483
484 int trace_pid_write(struct trace_pid_list *filtered_pids,
485                     struct trace_pid_list **new_pid_list,
486                     const char __user *ubuf, size_t cnt)
487 {
488         struct trace_pid_list *pid_list;
489         struct trace_parser parser;
490         unsigned long val;
491         int nr_pids = 0;
492         ssize_t read = 0;
493         ssize_t ret = 0;
494         loff_t pos;
495         pid_t pid;
496
497         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
498                 return -ENOMEM;
499
500         /*
501          * Always recreate a new array. The write is an all or nothing
502          * operation. Always create a new array when adding new pids by
503          * the user. If the operation fails, then the current list is
504          * not modified.
505          */
506         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
507         if (!pid_list) {
508                 trace_parser_put(&parser);
509                 return -ENOMEM;
510         }
511
512         pid_list->pid_max = READ_ONCE(pid_max);
513
514         /* Only truncating will shrink pid_max */
515         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
516                 pid_list->pid_max = filtered_pids->pid_max;
517
518         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
519         if (!pid_list->pids) {
520                 trace_parser_put(&parser);
521                 kfree(pid_list);
522                 return -ENOMEM;
523         }
524
525         if (filtered_pids) {
526                 /* copy the current bits to the new max */
527                 for_each_set_bit(pid, filtered_pids->pids,
528                                  filtered_pids->pid_max) {
529                         set_bit(pid, pid_list->pids);
530                         nr_pids++;
531                 }
532         }
533
534         while (cnt > 0) {
535
536                 pos = 0;
537
538                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
539                 if (ret < 0 || !trace_parser_loaded(&parser))
540                         break;
541
542                 read += ret;
543                 ubuf += ret;
544                 cnt -= ret;
545
546                 parser.buffer[parser.idx] = 0;
547
548                 ret = -EINVAL;
549                 if (kstrtoul(parser.buffer, 0, &val))
550                         break;
551                 if (val >= pid_list->pid_max)
552                         break;
553
554                 pid = (pid_t)val;
555
556                 set_bit(pid, pid_list->pids);
557                 nr_pids++;
558
559                 trace_parser_clear(&parser);
560                 ret = 0;
561         }
562         trace_parser_put(&parser);
563
564         if (ret < 0) {
565                 trace_free_pid_list(pid_list);
566                 return ret;
567         }
568
569         if (!nr_pids) {
570                 /* Cleared the list of pids */
571                 trace_free_pid_list(pid_list);
572                 read = ret;
573                 pid_list = NULL;
574         }
575
576         *new_pid_list = pid_list;
577
578         return read;
579 }
580
581 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
582 {
583         u64 ts;
584
585         /* Early boot up does not have a buffer yet */
586         if (!buf->buffer)
587                 return trace_clock_local();
588
589         ts = ring_buffer_time_stamp(buf->buffer, cpu);
590         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
591
592         return ts;
593 }
594
595 cycle_t ftrace_now(int cpu)
596 {
597         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
598 }
599
600 /**
601  * tracing_is_enabled - Show if global_trace has been disabled
602  *
603  * Shows if the global trace has been enabled or not. It uses the
604  * mirror flag "buffer_disabled" to be used in fast paths such as for
605  * the irqsoff tracer. But it may be inaccurate due to races. If you
606  * need to know the accurate state, use tracing_is_on() which is a little
607  * slower, but accurate.
608  */
609 int tracing_is_enabled(void)
610 {
611         /*
612          * For quick access (irqsoff uses this in fast path), just
613          * return the mirror variable of the state of the ring buffer.
614          * It's a little racy, but we don't really care.
615          */
616         smp_rmb();
617         return !global_trace.buffer_disabled;
618 }
619
620 /*
621  * trace_buf_size is the size in bytes that is allocated
622  * for a buffer. Note, the number of bytes is always rounded
623  * to page size.
624  *
625  * This number is purposely set to a low number of 16384.
626  * If the dump on oops happens, it will be much appreciated
627  * to not have to wait for all that output. Anyway this can be
628  * boot time and run time configurable.
629  */
630 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
631
632 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
633
634 /* trace_types holds a link list of available tracers. */
635 static struct tracer            *trace_types __read_mostly;
636
637 /*
638  * trace_types_lock is used to protect the trace_types list.
639  */
640 DEFINE_MUTEX(trace_types_lock);
641
642 /*
643  * serialize the access of the ring buffer
644  *
645  * ring buffer serializes readers, but it is low level protection.
646  * The validity of the events (which returns by ring_buffer_peek() ..etc)
647  * are not protected by ring buffer.
648  *
649  * The content of events may become garbage if we allow other process consumes
650  * these events concurrently:
651  *   A) the page of the consumed events may become a normal page
652  *      (not reader page) in ring buffer, and this page will be rewrited
653  *      by events producer.
654  *   B) The page of the consumed events may become a page for splice_read,
655  *      and this page will be returned to system.
656  *
657  * These primitives allow multi process access to different cpu ring buffer
658  * concurrently.
659  *
660  * These primitives don't distinguish read-only and read-consume access.
661  * Multi read-only access are also serialized.
662  */
663
664 #ifdef CONFIG_SMP
665 static DECLARE_RWSEM(all_cpu_access_lock);
666 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
667
668 static inline void trace_access_lock(int cpu)
669 {
670         if (cpu == RING_BUFFER_ALL_CPUS) {
671                 /* gain it for accessing the whole ring buffer. */
672                 down_write(&all_cpu_access_lock);
673         } else {
674                 /* gain it for accessing a cpu ring buffer. */
675
676                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
677                 down_read(&all_cpu_access_lock);
678
679                 /* Secondly block other access to this @cpu ring buffer. */
680                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
681         }
682 }
683
684 static inline void trace_access_unlock(int cpu)
685 {
686         if (cpu == RING_BUFFER_ALL_CPUS) {
687                 up_write(&all_cpu_access_lock);
688         } else {
689                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
690                 up_read(&all_cpu_access_lock);
691         }
692 }
693
694 static inline void trace_access_lock_init(void)
695 {
696         int cpu;
697
698         for_each_possible_cpu(cpu)
699                 mutex_init(&per_cpu(cpu_access_lock, cpu));
700 }
701
702 #else
703
704 static DEFINE_MUTEX(access_lock);
705
706 static inline void trace_access_lock(int cpu)
707 {
708         (void)cpu;
709         mutex_lock(&access_lock);
710 }
711
712 static inline void trace_access_unlock(int cpu)
713 {
714         (void)cpu;
715         mutex_unlock(&access_lock);
716 }
717
718 static inline void trace_access_lock_init(void)
719 {
720 }
721
722 #endif
723
724 #ifdef CONFIG_STACKTRACE
725 static void __ftrace_trace_stack(struct ring_buffer *buffer,
726                                  unsigned long flags,
727                                  int skip, int pc, struct pt_regs *regs);
728 static inline void ftrace_trace_stack(struct trace_array *tr,
729                                       struct ring_buffer *buffer,
730                                       unsigned long flags,
731                                       int skip, int pc, struct pt_regs *regs);
732
733 #else
734 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
735                                         unsigned long flags,
736                                         int skip, int pc, struct pt_regs *regs)
737 {
738 }
739 static inline void ftrace_trace_stack(struct trace_array *tr,
740                                       struct ring_buffer *buffer,
741                                       unsigned long flags,
742                                       int skip, int pc, struct pt_regs *regs)
743 {
744 }
745
746 #endif
747
748 static void tracer_tracing_on(struct trace_array *tr)
749 {
750         if (tr->trace_buffer.buffer)
751                 ring_buffer_record_on(tr->trace_buffer.buffer);
752         /*
753          * This flag is looked at when buffers haven't been allocated
754          * yet, or by some tracers (like irqsoff), that just want to
755          * know if the ring buffer has been disabled, but it can handle
756          * races of where it gets disabled but we still do a record.
757          * As the check is in the fast path of the tracers, it is more
758          * important to be fast than accurate.
759          */
760         tr->buffer_disabled = 0;
761         /* Make the flag seen by readers */
762         smp_wmb();
763 }
764
765 /**
766  * tracing_on - enable tracing buffers
767  *
768  * This function enables tracing buffers that may have been
769  * disabled with tracing_off.
770  */
771 void tracing_on(void)
772 {
773         tracer_tracing_on(&global_trace);
774 }
775 EXPORT_SYMBOL_GPL(tracing_on);
776
777 /**
778  * __trace_puts - write a constant string into the trace buffer.
779  * @ip:    The address of the caller
780  * @str:   The constant string to write
781  * @size:  The size of the string.
782  */
783 int __trace_puts(unsigned long ip, const char *str, int size)
784 {
785         struct ring_buffer_event *event;
786         struct ring_buffer *buffer;
787         struct print_entry *entry;
788         unsigned long irq_flags;
789         int alloc;
790         int pc;
791
792         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
793                 return 0;
794
795         pc = preempt_count();
796
797         if (unlikely(tracing_selftest_running || tracing_disabled))
798                 return 0;
799
800         alloc = sizeof(*entry) + size + 2; /* possible \n added */
801
802         local_save_flags(irq_flags);
803         buffer = global_trace.trace_buffer.buffer;
804         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
805                                           irq_flags, pc);
806         if (!event)
807                 return 0;
808
809         entry = ring_buffer_event_data(event);
810         entry->ip = ip;
811
812         memcpy(&entry->buf, str, size);
813
814         /* Add a newline if necessary */
815         if (entry->buf[size - 1] != '\n') {
816                 entry->buf[size] = '\n';
817                 entry->buf[size + 1] = '\0';
818         } else
819                 entry->buf[size] = '\0';
820
821         __buffer_unlock_commit(buffer, event);
822         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
823
824         return size;
825 }
826 EXPORT_SYMBOL_GPL(__trace_puts);
827
828 /**
829  * __trace_bputs - write the pointer to a constant string into trace buffer
830  * @ip:    The address of the caller
831  * @str:   The constant string to write to the buffer to
832  */
833 int __trace_bputs(unsigned long ip, const char *str)
834 {
835         struct ring_buffer_event *event;
836         struct ring_buffer *buffer;
837         struct bputs_entry *entry;
838         unsigned long irq_flags;
839         int size = sizeof(struct bputs_entry);
840         int pc;
841
842         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843                 return 0;
844
845         pc = preempt_count();
846
847         if (unlikely(tracing_selftest_running || tracing_disabled))
848                 return 0;
849
850         local_save_flags(irq_flags);
851         buffer = global_trace.trace_buffer.buffer;
852         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
853                                           irq_flags, pc);
854         if (!event)
855                 return 0;
856
857         entry = ring_buffer_event_data(event);
858         entry->ip                       = ip;
859         entry->str                      = str;
860
861         __buffer_unlock_commit(buffer, event);
862         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
863
864         return 1;
865 }
866 EXPORT_SYMBOL_GPL(__trace_bputs);
867
868 #ifdef CONFIG_TRACER_SNAPSHOT
869 /**
870  * trace_snapshot - take a snapshot of the current buffer.
871  *
872  * This causes a swap between the snapshot buffer and the current live
873  * tracing buffer. You can use this to take snapshots of the live
874  * trace when some condition is triggered, but continue to trace.
875  *
876  * Note, make sure to allocate the snapshot with either
877  * a tracing_snapshot_alloc(), or by doing it manually
878  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
879  *
880  * If the snapshot buffer is not allocated, it will stop tracing.
881  * Basically making a permanent snapshot.
882  */
883 void tracing_snapshot(void)
884 {
885         struct trace_array *tr = &global_trace;
886         struct tracer *tracer = tr->current_trace;
887         unsigned long flags;
888
889         if (in_nmi()) {
890                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
891                 internal_trace_puts("*** snapshot is being ignored        ***\n");
892                 return;
893         }
894
895         if (!tr->allocated_snapshot) {
896                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
897                 internal_trace_puts("*** stopping trace here!   ***\n");
898                 tracing_off();
899                 return;
900         }
901
902         /* Note, snapshot can not be used when the tracer uses it */
903         if (tracer->use_max_tr) {
904                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
905                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
906                 return;
907         }
908
909         local_irq_save(flags);
910         update_max_tr(tr, current, smp_processor_id());
911         local_irq_restore(flags);
912 }
913 EXPORT_SYMBOL_GPL(tracing_snapshot);
914
915 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
916                                         struct trace_buffer *size_buf, int cpu_id);
917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
918
919 static int alloc_snapshot(struct trace_array *tr)
920 {
921         int ret;
922
923         if (!tr->allocated_snapshot) {
924
925                 /* allocate spare buffer */
926                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
927                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
928                 if (ret < 0)
929                         return ret;
930
931                 tr->allocated_snapshot = true;
932         }
933
934         return 0;
935 }
936
937 static void free_snapshot(struct trace_array *tr)
938 {
939         /*
940          * We don't free the ring buffer. instead, resize it because
941          * The max_tr ring buffer has some state (e.g. ring->clock) and
942          * we want preserve it.
943          */
944         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
945         set_buffer_entries(&tr->max_buffer, 1);
946         tracing_reset_online_cpus(&tr->max_buffer);
947         tr->allocated_snapshot = false;
948 }
949
950 /**
951  * tracing_alloc_snapshot - allocate snapshot buffer.
952  *
953  * This only allocates the snapshot buffer if it isn't already
954  * allocated - it doesn't also take a snapshot.
955  *
956  * This is meant to be used in cases where the snapshot buffer needs
957  * to be set up for events that can't sleep but need to be able to
958  * trigger a snapshot.
959  */
960 int tracing_alloc_snapshot(void)
961 {
962         struct trace_array *tr = &global_trace;
963         int ret;
964
965         ret = alloc_snapshot(tr);
966         WARN_ON(ret < 0);
967
968         return ret;
969 }
970 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
971
972 /**
973  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
974  *
975  * This is similar to trace_snapshot(), but it will allocate the
976  * snapshot buffer if it isn't already allocated. Use this only
977  * where it is safe to sleep, as the allocation may sleep.
978  *
979  * This causes a swap between the snapshot buffer and the current live
980  * tracing buffer. You can use this to take snapshots of the live
981  * trace when some condition is triggered, but continue to trace.
982  */
983 void tracing_snapshot_alloc(void)
984 {
985         int ret;
986
987         ret = tracing_alloc_snapshot();
988         if (ret < 0)
989                 return;
990
991         tracing_snapshot();
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
994 #else
995 void tracing_snapshot(void)
996 {
997         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000 int tracing_alloc_snapshot(void)
1001 {
1002         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1003         return -ENODEV;
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1006 void tracing_snapshot_alloc(void)
1007 {
1008         /* Give warning */
1009         tracing_snapshot();
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1012 #endif /* CONFIG_TRACER_SNAPSHOT */
1013
1014 static void tracer_tracing_off(struct trace_array *tr)
1015 {
1016         if (tr->trace_buffer.buffer)
1017                 ring_buffer_record_off(tr->trace_buffer.buffer);
1018         /*
1019          * This flag is looked at when buffers haven't been allocated
1020          * yet, or by some tracers (like irqsoff), that just want to
1021          * know if the ring buffer has been disabled, but it can handle
1022          * races of where it gets disabled but we still do a record.
1023          * As the check is in the fast path of the tracers, it is more
1024          * important to be fast than accurate.
1025          */
1026         tr->buffer_disabled = 1;
1027         /* Make the flag seen by readers */
1028         smp_wmb();
1029 }
1030
1031 /**
1032  * tracing_off - turn off tracing buffers
1033  *
1034  * This function stops the tracing buffers from recording data.
1035  * It does not disable any overhead the tracers themselves may
1036  * be causing. This function simply causes all recording to
1037  * the ring buffers to fail.
1038  */
1039 void tracing_off(void)
1040 {
1041         tracer_tracing_off(&global_trace);
1042 }
1043 EXPORT_SYMBOL_GPL(tracing_off);
1044
1045 void disable_trace_on_warning(void)
1046 {
1047         if (__disable_trace_on_warning)
1048                 tracing_off();
1049 }
1050
1051 /**
1052  * tracer_tracing_is_on - show real state of ring buffer enabled
1053  * @tr : the trace array to know if ring buffer is enabled
1054  *
1055  * Shows real state of the ring buffer if it is enabled or not.
1056  */
1057 int tracer_tracing_is_on(struct trace_array *tr)
1058 {
1059         if (tr->trace_buffer.buffer)
1060                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1061         return !tr->buffer_disabled;
1062 }
1063
1064 /**
1065  * tracing_is_on - show state of ring buffers enabled
1066  */
1067 int tracing_is_on(void)
1068 {
1069         return tracer_tracing_is_on(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_is_on);
1072
1073 static int __init set_buf_size(char *str)
1074 {
1075         unsigned long buf_size;
1076
1077         if (!str)
1078                 return 0;
1079         buf_size = memparse(str, &str);
1080         /*
1081          * nr_entries can not be zero and the startup
1082          * tests require some buffer space. Therefore
1083          * ensure we have at least 4096 bytes of buffer.
1084          */
1085         trace_buf_size = max(4096UL, buf_size);
1086         return 1;
1087 }
1088 __setup("trace_buf_size=", set_buf_size);
1089
1090 static int __init set_tracing_thresh(char *str)
1091 {
1092         unsigned long threshold;
1093         int ret;
1094
1095         if (!str)
1096                 return 0;
1097         ret = kstrtoul(str, 0, &threshold);
1098         if (ret < 0)
1099                 return 0;
1100         tracing_thresh = threshold * 1000;
1101         return 1;
1102 }
1103 __setup("tracing_thresh=", set_tracing_thresh);
1104
1105 unsigned long nsecs_to_usecs(unsigned long nsecs)
1106 {
1107         return nsecs / 1000;
1108 }
1109
1110 /*
1111  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1112  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1113  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1114  * of strings in the order that the enums were defined.
1115  */
1116 #undef C
1117 #define C(a, b) b
1118
1119 /* These must match the bit postions in trace_iterator_flags */
1120 static const char *trace_options[] = {
1121         TRACE_FLAGS
1122         NULL
1123 };
1124
1125 static struct {
1126         u64 (*func)(void);
1127         const char *name;
1128         int in_ns;              /* is this clock in nanoseconds? */
1129 } trace_clocks[] = {
1130         { trace_clock_local,            "local",        1 },
1131         { trace_clock_global,           "global",       1 },
1132         { trace_clock_counter,          "counter",      0 },
1133         { trace_clock_jiffies,          "uptime",       0 },
1134         { trace_clock,                  "perf",         1 },
1135         { ktime_get_mono_fast_ns,       "mono",         1 },
1136         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1137         ARCH_TRACE_CLOCKS
1138 };
1139
1140 /*
1141  * trace_parser_get_init - gets the buffer for trace parser
1142  */
1143 int trace_parser_get_init(struct trace_parser *parser, int size)
1144 {
1145         memset(parser, 0, sizeof(*parser));
1146
1147         parser->buffer = kmalloc(size, GFP_KERNEL);
1148         if (!parser->buffer)
1149                 return 1;
1150
1151         parser->size = size;
1152         return 0;
1153 }
1154
1155 /*
1156  * trace_parser_put - frees the buffer for trace parser
1157  */
1158 void trace_parser_put(struct trace_parser *parser)
1159 {
1160         kfree(parser->buffer);
1161 }
1162
1163 /*
1164  * trace_get_user - reads the user input string separated by  space
1165  * (matched by isspace(ch))
1166  *
1167  * For each string found the 'struct trace_parser' is updated,
1168  * and the function returns.
1169  *
1170  * Returns number of bytes read.
1171  *
1172  * See kernel/trace/trace.h for 'struct trace_parser' details.
1173  */
1174 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1175         size_t cnt, loff_t *ppos)
1176 {
1177         char ch;
1178         size_t read = 0;
1179         ssize_t ret;
1180
1181         if (!*ppos)
1182                 trace_parser_clear(parser);
1183
1184         ret = get_user(ch, ubuf++);
1185         if (ret)
1186                 goto out;
1187
1188         read++;
1189         cnt--;
1190
1191         /*
1192          * The parser is not finished with the last write,
1193          * continue reading the user input without skipping spaces.
1194          */
1195         if (!parser->cont) {
1196                 /* skip white space */
1197                 while (cnt && isspace(ch)) {
1198                         ret = get_user(ch, ubuf++);
1199                         if (ret)
1200                                 goto out;
1201                         read++;
1202                         cnt--;
1203                 }
1204
1205                 /* only spaces were written */
1206                 if (isspace(ch)) {
1207                         *ppos += read;
1208                         ret = read;
1209                         goto out;
1210                 }
1211
1212                 parser->idx = 0;
1213         }
1214
1215         /* read the non-space input */
1216         while (cnt && !isspace(ch)) {
1217                 if (parser->idx < parser->size - 1)
1218                         parser->buffer[parser->idx++] = ch;
1219                 else {
1220                         ret = -EINVAL;
1221                         goto out;
1222                 }
1223                 ret = get_user(ch, ubuf++);
1224                 if (ret)
1225                         goto out;
1226                 read++;
1227                 cnt--;
1228         }
1229
1230         /* We either got finished input or we have to wait for another call. */
1231         if (isspace(ch)) {
1232                 parser->buffer[parser->idx] = 0;
1233                 parser->cont = false;
1234         } else if (parser->idx < parser->size - 1) {
1235                 parser->cont = true;
1236                 parser->buffer[parser->idx++] = ch;
1237         } else {
1238                 ret = -EINVAL;
1239                 goto out;
1240         }
1241
1242         *ppos += read;
1243         ret = read;
1244
1245 out:
1246         return ret;
1247 }
1248
1249 /* TODO add a seq_buf_to_buffer() */
1250 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1251 {
1252         int len;
1253
1254         if (trace_seq_used(s) <= s->seq.readpos)
1255                 return -EBUSY;
1256
1257         len = trace_seq_used(s) - s->seq.readpos;
1258         if (cnt > len)
1259                 cnt = len;
1260         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1261
1262         s->seq.readpos += cnt;
1263         return cnt;
1264 }
1265
1266 unsigned long __read_mostly     tracing_thresh;
1267
1268 #ifdef CONFIG_TRACER_MAX_TRACE
1269 /*
1270  * Copy the new maximum trace into the separate maximum-trace
1271  * structure. (this way the maximum trace is permanently saved,
1272  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1273  */
1274 static void
1275 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1276 {
1277         struct trace_buffer *trace_buf = &tr->trace_buffer;
1278         struct trace_buffer *max_buf = &tr->max_buffer;
1279         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1280         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1281
1282         max_buf->cpu = cpu;
1283         max_buf->time_start = data->preempt_timestamp;
1284
1285         max_data->saved_latency = tr->max_latency;
1286         max_data->critical_start = data->critical_start;
1287         max_data->critical_end = data->critical_end;
1288
1289         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1290         max_data->pid = tsk->pid;
1291         /*
1292          * If tsk == current, then use current_uid(), as that does not use
1293          * RCU. The irq tracer can be called out of RCU scope.
1294          */
1295         if (tsk == current)
1296                 max_data->uid = current_uid();
1297         else
1298                 max_data->uid = task_uid(tsk);
1299
1300         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1301         max_data->policy = tsk->policy;
1302         max_data->rt_priority = tsk->rt_priority;
1303
1304         /* record this tasks comm */
1305         tracing_record_cmdline(tsk);
1306 }
1307
1308 /**
1309  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1310  * @tr: tracer
1311  * @tsk: the task with the latency
1312  * @cpu: The cpu that initiated the trace.
1313  *
1314  * Flip the buffers between the @tr and the max_tr and record information
1315  * about which task was the cause of this latency.
1316  */
1317 void
1318 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct ring_buffer *buf;
1321
1322         if (tr->stop_count)
1323                 return;
1324
1325         WARN_ON_ONCE(!irqs_disabled());
1326
1327         if (!tr->allocated_snapshot) {
1328                 /* Only the nop tracer should hit this when disabling */
1329                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1330                 return;
1331         }
1332
1333         arch_spin_lock(&tr->max_lock);
1334
1335         /* Inherit the recordable setting from trace_buffer */
1336         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1337                 ring_buffer_record_on(tr->max_buffer.buffer);
1338         else
1339                 ring_buffer_record_off(tr->max_buffer.buffer);
1340
1341         buf = tr->trace_buffer.buffer;
1342         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1343         tr->max_buffer.buffer = buf;
1344
1345         __update_max_tr(tr, tsk, cpu);
1346         arch_spin_unlock(&tr->max_lock);
1347 }
1348
1349 /**
1350  * update_max_tr_single - only copy one trace over, and reset the rest
1351  * @tr - tracer
1352  * @tsk - task with the latency
1353  * @cpu - the cpu of the buffer to copy.
1354  *
1355  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1356  */
1357 void
1358 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1359 {
1360         int ret;
1361
1362         if (tr->stop_count)
1363                 return;
1364
1365         WARN_ON_ONCE(!irqs_disabled());
1366         if (!tr->allocated_snapshot) {
1367                 /* Only the nop tracer should hit this when disabling */
1368                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1369                 return;
1370         }
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1375
1376         if (ret == -EBUSY) {
1377                 /*
1378                  * We failed to swap the buffer due to a commit taking
1379                  * place on this CPU. We fail to record, but we reset
1380                  * the max trace buffer (no one writes directly to it)
1381                  * and flag that it failed.
1382                  */
1383                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1384                         "Failed to swap buffers due to commit in progress\n");
1385         }
1386
1387         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1388
1389         __update_max_tr(tr, tsk, cpu);
1390         arch_spin_unlock(&tr->max_lock);
1391 }
1392 #endif /* CONFIG_TRACER_MAX_TRACE */
1393
1394 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1395 {
1396         /* Iterators are static, they should be filled or empty */
1397         if (trace_buffer_iter(iter, iter->cpu_file))
1398                 return 0;
1399
1400         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1401                                 full);
1402 }
1403
1404 #ifdef CONFIG_FTRACE_STARTUP_TEST
1405 static int run_tracer_selftest(struct tracer *type)
1406 {
1407         struct trace_array *tr = &global_trace;
1408         struct tracer *saved_tracer = tr->current_trace;
1409         int ret;
1410
1411         if (!type->selftest || tracing_selftest_disabled)
1412                 return 0;
1413
1414         /*
1415          * Run a selftest on this tracer.
1416          * Here we reset the trace buffer, and set the current
1417          * tracer to be this tracer. The tracer can then run some
1418          * internal tracing to verify that everything is in order.
1419          * If we fail, we do not register this tracer.
1420          */
1421         tracing_reset_online_cpus(&tr->trace_buffer);
1422
1423         tr->current_trace = type;
1424
1425 #ifdef CONFIG_TRACER_MAX_TRACE
1426         if (type->use_max_tr) {
1427                 /* If we expanded the buffers, make sure the max is expanded too */
1428                 if (ring_buffer_expanded)
1429                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1430                                            RING_BUFFER_ALL_CPUS);
1431                 tr->allocated_snapshot = true;
1432         }
1433 #endif
1434
1435         /* the test is responsible for initializing and enabling */
1436         pr_info("Testing tracer %s: ", type->name);
1437         ret = type->selftest(type, tr);
1438         /* the test is responsible for resetting too */
1439         tr->current_trace = saved_tracer;
1440         if (ret) {
1441                 printk(KERN_CONT "FAILED!\n");
1442                 /* Add the warning after printing 'FAILED' */
1443                 WARN_ON(1);
1444                 return -1;
1445         }
1446         /* Only reset on passing, to avoid touching corrupted buffers */
1447         tracing_reset_online_cpus(&tr->trace_buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         if (type->use_max_tr) {
1451                 tr->allocated_snapshot = false;
1452
1453                 /* Shrink the max buffer again */
1454                 if (ring_buffer_expanded)
1455                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1456                                            RING_BUFFER_ALL_CPUS);
1457         }
1458 #endif
1459
1460         printk(KERN_CONT "PASSED\n");
1461         return 0;
1462 }
1463 #else
1464 static inline int run_tracer_selftest(struct tracer *type)
1465 {
1466         return 0;
1467 }
1468 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1469
1470 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1471
1472 static void __init apply_trace_boot_options(void);
1473
1474 /**
1475  * register_tracer - register a tracer with the ftrace system.
1476  * @type - the plugin for the tracer
1477  *
1478  * Register a new plugin tracer.
1479  */
1480 int __init register_tracer(struct tracer *type)
1481 {
1482         struct tracer *t;
1483         int ret = 0;
1484
1485         if (!type->name) {
1486                 pr_info("Tracer must have a name\n");
1487                 return -1;
1488         }
1489
1490         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1491                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1492                 return -1;
1493         }
1494
1495         mutex_lock(&trace_types_lock);
1496
1497         tracing_selftest_running = true;
1498
1499         for (t = trace_types; t; t = t->next) {
1500                 if (strcmp(type->name, t->name) == 0) {
1501                         /* already found */
1502                         pr_info("Tracer %s already registered\n",
1503                                 type->name);
1504                         ret = -1;
1505                         goto out;
1506                 }
1507         }
1508
1509         if (!type->set_flag)
1510                 type->set_flag = &dummy_set_flag;
1511         if (!type->flags) {
1512                 /*allocate a dummy tracer_flags*/
1513                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1514                 if (!type->flags) {
1515                         ret = -ENOMEM;
1516                         goto out;
1517                 }
1518                 type->flags->val = 0;
1519                 type->flags->opts = dummy_tracer_opt;
1520         } else
1521                 if (!type->flags->opts)
1522                         type->flags->opts = dummy_tracer_opt;
1523
1524         /* store the tracer for __set_tracer_option */
1525         type->flags->trace = type;
1526
1527         ret = run_tracer_selftest(type);
1528         if (ret < 0)
1529                 goto out;
1530
1531         type->next = trace_types;
1532         trace_types = type;
1533         add_tracer_options(&global_trace, type);
1534
1535  out:
1536         tracing_selftest_running = false;
1537         mutex_unlock(&trace_types_lock);
1538
1539         if (ret || !default_bootup_tracer)
1540                 goto out_unlock;
1541
1542         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1543                 goto out_unlock;
1544
1545         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1546         /* Do we want this tracer to start on bootup? */
1547         tracing_set_tracer(&global_trace, type->name);
1548         default_bootup_tracer = NULL;
1549
1550         apply_trace_boot_options();
1551
1552         /* disable other selftests, since this will break it. */
1553         tracing_selftest_disabled = true;
1554 #ifdef CONFIG_FTRACE_STARTUP_TEST
1555         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1556                type->name);
1557 #endif
1558
1559  out_unlock:
1560         return ret;
1561 }
1562
1563 void tracing_reset(struct trace_buffer *buf, int cpu)
1564 {
1565         struct ring_buffer *buffer = buf->buffer;
1566
1567         if (!buffer)
1568                 return;
1569
1570         ring_buffer_record_disable(buffer);
1571
1572         /* Make sure all commits have finished */
1573         synchronize_sched();
1574         ring_buffer_reset_cpu(buffer, cpu);
1575
1576         ring_buffer_record_enable(buffer);
1577 }
1578
1579 void tracing_reset_online_cpus(struct trace_buffer *buf)
1580 {
1581         struct ring_buffer *buffer = buf->buffer;
1582         int cpu;
1583
1584         if (!buffer)
1585                 return;
1586
1587         ring_buffer_record_disable(buffer);
1588
1589         /* Make sure all commits have finished */
1590         synchronize_sched();
1591
1592         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1593
1594         for_each_online_cpu(cpu)
1595                 ring_buffer_reset_cpu(buffer, cpu);
1596
1597         ring_buffer_record_enable(buffer);
1598 }
1599
1600 /* Must have trace_types_lock held */
1601 void tracing_reset_all_online_cpus(void)
1602 {
1603         struct trace_array *tr;
1604
1605         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1606                 tracing_reset_online_cpus(&tr->trace_buffer);
1607 #ifdef CONFIG_TRACER_MAX_TRACE
1608                 tracing_reset_online_cpus(&tr->max_buffer);
1609 #endif
1610         }
1611 }
1612
1613 #define SAVED_CMDLINES_DEFAULT 128
1614 #define NO_CMDLINE_MAP UINT_MAX
1615 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1616 struct saved_cmdlines_buffer {
1617         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1618         unsigned *map_cmdline_to_pid;
1619         unsigned cmdline_num;
1620         int cmdline_idx;
1621         char *saved_cmdlines;
1622 };
1623 static struct saved_cmdlines_buffer *savedcmd;
1624
1625 static inline char *get_saved_cmdlines(int idx)
1626 {
1627         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1628 }
1629
1630 static inline void set_cmdline(int idx, const char *cmdline)
1631 {
1632         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1633 }
1634
1635 static int allocate_cmdlines_buffer(unsigned int val,
1636                                     struct saved_cmdlines_buffer *s)
1637 {
1638         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1639                                         GFP_KERNEL);
1640         if (!s->map_cmdline_to_pid)
1641                 return -ENOMEM;
1642
1643         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1644         if (!s->saved_cmdlines) {
1645                 kfree(s->map_cmdline_to_pid);
1646                 return -ENOMEM;
1647         }
1648
1649         s->cmdline_idx = 0;
1650         s->cmdline_num = val;
1651         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1652                sizeof(s->map_pid_to_cmdline));
1653         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1654                val * sizeof(*s->map_cmdline_to_pid));
1655
1656         return 0;
1657 }
1658
1659 static int trace_create_savedcmd(void)
1660 {
1661         int ret;
1662
1663         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1664         if (!savedcmd)
1665                 return -ENOMEM;
1666
1667         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1668         if (ret < 0) {
1669                 kfree(savedcmd);
1670                 savedcmd = NULL;
1671                 return -ENOMEM;
1672         }
1673
1674         return 0;
1675 }
1676
1677 int is_tracing_stopped(void)
1678 {
1679         return global_trace.stop_count;
1680 }
1681
1682 /**
1683  * tracing_start - quick start of the tracer
1684  *
1685  * If tracing is enabled but was stopped by tracing_stop,
1686  * this will start the tracer back up.
1687  */
1688 void tracing_start(void)
1689 {
1690         struct ring_buffer *buffer;
1691         unsigned long flags;
1692
1693         if (tracing_disabled)
1694                 return;
1695
1696         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1697         if (--global_trace.stop_count) {
1698                 if (global_trace.stop_count < 0) {
1699                         /* Someone screwed up their debugging */
1700                         WARN_ON_ONCE(1);
1701                         global_trace.stop_count = 0;
1702                 }
1703                 goto out;
1704         }
1705
1706         /* Prevent the buffers from switching */
1707         arch_spin_lock(&global_trace.max_lock);
1708
1709         buffer = global_trace.trace_buffer.buffer;
1710         if (buffer)
1711                 ring_buffer_record_enable(buffer);
1712
1713 #ifdef CONFIG_TRACER_MAX_TRACE
1714         buffer = global_trace.max_buffer.buffer;
1715         if (buffer)
1716                 ring_buffer_record_enable(buffer);
1717 #endif
1718
1719         arch_spin_unlock(&global_trace.max_lock);
1720
1721  out:
1722         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1723 }
1724
1725 static void tracing_start_tr(struct trace_array *tr)
1726 {
1727         struct ring_buffer *buffer;
1728         unsigned long flags;
1729
1730         if (tracing_disabled)
1731                 return;
1732
1733         /* If global, we need to also start the max tracer */
1734         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1735                 return tracing_start();
1736
1737         raw_spin_lock_irqsave(&tr->start_lock, flags);
1738
1739         if (--tr->stop_count) {
1740                 if (tr->stop_count < 0) {
1741                         /* Someone screwed up their debugging */
1742                         WARN_ON_ONCE(1);
1743                         tr->stop_count = 0;
1744                 }
1745                 goto out;
1746         }
1747
1748         buffer = tr->trace_buffer.buffer;
1749         if (buffer)
1750                 ring_buffer_record_enable(buffer);
1751
1752  out:
1753         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1754 }
1755
1756 /**
1757  * tracing_stop - quick stop of the tracer
1758  *
1759  * Light weight way to stop tracing. Use in conjunction with
1760  * tracing_start.
1761  */
1762 void tracing_stop(void)
1763 {
1764         struct ring_buffer *buffer;
1765         unsigned long flags;
1766
1767         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1768         if (global_trace.stop_count++)
1769                 goto out;
1770
1771         /* Prevent the buffers from switching */
1772         arch_spin_lock(&global_trace.max_lock);
1773
1774         buffer = global_trace.trace_buffer.buffer;
1775         if (buffer)
1776                 ring_buffer_record_disable(buffer);
1777
1778 #ifdef CONFIG_TRACER_MAX_TRACE
1779         buffer = global_trace.max_buffer.buffer;
1780         if (buffer)
1781                 ring_buffer_record_disable(buffer);
1782 #endif
1783
1784         arch_spin_unlock(&global_trace.max_lock);
1785
1786  out:
1787         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1788 }
1789
1790 static void tracing_stop_tr(struct trace_array *tr)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         /* If global, we need to also stop the max tracer */
1796         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1797                 return tracing_stop();
1798
1799         raw_spin_lock_irqsave(&tr->start_lock, flags);
1800         if (tr->stop_count++)
1801                 goto out;
1802
1803         buffer = tr->trace_buffer.buffer;
1804         if (buffer)
1805                 ring_buffer_record_disable(buffer);
1806
1807  out:
1808         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1809 }
1810
1811 void trace_stop_cmdline_recording(void);
1812
1813 static int trace_save_cmdline(struct task_struct *tsk)
1814 {
1815         unsigned tpid, idx;
1816
1817         /* treat recording of idle task as a success */
1818         if (!tsk->pid)
1819                 return 1;
1820
1821         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1822
1823         /*
1824          * It's not the end of the world if we don't get
1825          * the lock, but we also don't want to spin
1826          * nor do we want to disable interrupts,
1827          * so if we miss here, then better luck next time.
1828          */
1829         if (!arch_spin_trylock(&trace_cmdline_lock))
1830                 return 0;
1831
1832         idx = savedcmd->map_pid_to_cmdline[tpid];
1833         if (idx == NO_CMDLINE_MAP) {
1834                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1835
1836                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1837                 savedcmd->cmdline_idx = idx;
1838         }
1839
1840         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1841         set_cmdline(idx, tsk->comm);
1842
1843         arch_spin_unlock(&trace_cmdline_lock);
1844
1845         return 1;
1846 }
1847
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850         unsigned map;
1851         int tpid;
1852
1853         if (!pid) {
1854                 strcpy(comm, "<idle>");
1855                 return;
1856         }
1857
1858         if (WARN_ON_ONCE(pid < 0)) {
1859                 strcpy(comm, "<XXX>");
1860                 return;
1861         }
1862
1863         tpid = pid & (PID_MAX_DEFAULT - 1);
1864         map = savedcmd->map_pid_to_cmdline[tpid];
1865         if (map != NO_CMDLINE_MAP) {
1866                 tpid = savedcmd->map_cmdline_to_pid[map];
1867                 if (tpid == pid) {
1868                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1869                         return;
1870                 }
1871         }
1872         strcpy(comm, "<...>");
1873 }
1874
1875 void trace_find_cmdline(int pid, char comm[])
1876 {
1877         preempt_disable();
1878         arch_spin_lock(&trace_cmdline_lock);
1879
1880         __trace_find_cmdline(pid, comm);
1881
1882         arch_spin_unlock(&trace_cmdline_lock);
1883         preempt_enable();
1884 }
1885
1886 void tracing_record_cmdline(struct task_struct *tsk)
1887 {
1888         if (!__this_cpu_read(trace_cmdline_save))
1889                 return;
1890
1891         if (trace_save_cmdline(tsk))
1892                 __this_cpu_write(trace_cmdline_save, false);
1893 }
1894
1895 void
1896 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1897                              int pc)
1898 {
1899         struct task_struct *tsk = current;
1900
1901         entry->preempt_count            = pc & 0xff;
1902         entry->pid                      = (tsk) ? tsk->pid : 0;
1903         entry->flags =
1904 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1905                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1906 #else
1907                 TRACE_FLAG_IRQS_NOSUPPORT |
1908 #endif
1909                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1910                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1911                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1912                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1913                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1914 }
1915 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1916
1917 static __always_inline void
1918 trace_event_setup(struct ring_buffer_event *event,
1919                   int type, unsigned long flags, int pc)
1920 {
1921         struct trace_entry *ent = ring_buffer_event_data(event);
1922
1923         tracing_generic_entry_update(ent, flags, pc);
1924         ent->type = type;
1925 }
1926
1927 struct ring_buffer_event *
1928 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1929                           int type,
1930                           unsigned long len,
1931                           unsigned long flags, int pc)
1932 {
1933         struct ring_buffer_event *event;
1934
1935         event = ring_buffer_lock_reserve(buffer, len);
1936         if (event != NULL)
1937                 trace_event_setup(event, type, flags, pc);
1938
1939         return event;
1940 }
1941
1942 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1943 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1944 static int trace_buffered_event_ref;
1945
1946 /**
1947  * trace_buffered_event_enable - enable buffering events
1948  *
1949  * When events are being filtered, it is quicker to use a temporary
1950  * buffer to write the event data into if there's a likely chance
1951  * that it will not be committed. The discard of the ring buffer
1952  * is not as fast as committing, and is much slower than copying
1953  * a commit.
1954  *
1955  * When an event is to be filtered, allocate per cpu buffers to
1956  * write the event data into, and if the event is filtered and discarded
1957  * it is simply dropped, otherwise, the entire data is to be committed
1958  * in one shot.
1959  */
1960 void trace_buffered_event_enable(void)
1961 {
1962         struct ring_buffer_event *event;
1963         struct page *page;
1964         int cpu;
1965
1966         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1967
1968         if (trace_buffered_event_ref++)
1969                 return;
1970
1971         for_each_tracing_cpu(cpu) {
1972                 page = alloc_pages_node(cpu_to_node(cpu),
1973                                         GFP_KERNEL | __GFP_NORETRY, 0);
1974                 if (!page)
1975                         goto failed;
1976
1977                 event = page_address(page);
1978                 memset(event, 0, sizeof(*event));
1979
1980                 per_cpu(trace_buffered_event, cpu) = event;
1981
1982                 preempt_disable();
1983                 if (cpu == smp_processor_id() &&
1984                     this_cpu_read(trace_buffered_event) !=
1985                     per_cpu(trace_buffered_event, cpu))
1986                         WARN_ON_ONCE(1);
1987                 preempt_enable();
1988         }
1989
1990         return;
1991  failed:
1992         trace_buffered_event_disable();
1993 }
1994
1995 static void enable_trace_buffered_event(void *data)
1996 {
1997         /* Probably not needed, but do it anyway */
1998         smp_rmb();
1999         this_cpu_dec(trace_buffered_event_cnt);
2000 }
2001
2002 static void disable_trace_buffered_event(void *data)
2003 {
2004         this_cpu_inc(trace_buffered_event_cnt);
2005 }
2006
2007 /**
2008  * trace_buffered_event_disable - disable buffering events
2009  *
2010  * When a filter is removed, it is faster to not use the buffered
2011  * events, and to commit directly into the ring buffer. Free up
2012  * the temp buffers when there are no more users. This requires
2013  * special synchronization with current events.
2014  */
2015 void trace_buffered_event_disable(void)
2016 {
2017         int cpu;
2018
2019         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2020
2021         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2022                 return;
2023
2024         if (--trace_buffered_event_ref)
2025                 return;
2026
2027         preempt_disable();
2028         /* For each CPU, set the buffer as used. */
2029         smp_call_function_many(tracing_buffer_mask,
2030                                disable_trace_buffered_event, NULL, 1);
2031         preempt_enable();
2032
2033         /* Wait for all current users to finish */
2034         synchronize_sched();
2035
2036         for_each_tracing_cpu(cpu) {
2037                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2038                 per_cpu(trace_buffered_event, cpu) = NULL;
2039         }
2040         /*
2041          * Make sure trace_buffered_event is NULL before clearing
2042          * trace_buffered_event_cnt.
2043          */
2044         smp_wmb();
2045
2046         preempt_disable();
2047         /* Do the work on each cpu */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                enable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051 }
2052
2053 void
2054 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2055 {
2056         __this_cpu_write(trace_cmdline_save, true);
2057
2058         /* If this is the temp buffer, we need to commit fully */
2059         if (this_cpu_read(trace_buffered_event) == event) {
2060                 /* Length is in event->array[0] */
2061                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2062                 /* Release the temp buffer */
2063                 this_cpu_dec(trace_buffered_event_cnt);
2064         } else
2065                 ring_buffer_unlock_commit(buffer, event);
2066 }
2067
2068 static struct ring_buffer *temp_buffer;
2069
2070 struct ring_buffer_event *
2071 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2072                           struct trace_event_file *trace_file,
2073                           int type, unsigned long len,
2074                           unsigned long flags, int pc)
2075 {
2076         struct ring_buffer_event *entry;
2077         int val;
2078
2079         *current_rb = trace_file->tr->trace_buffer.buffer;
2080
2081         if ((trace_file->flags &
2082              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2083             (entry = this_cpu_read(trace_buffered_event))) {
2084                 /* Try to use the per cpu buffer first */
2085                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2086                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2087                         trace_event_setup(entry, type, flags, pc);
2088                         entry->array[0] = len;
2089                         return entry;
2090                 }
2091                 this_cpu_dec(trace_buffered_event_cnt);
2092         }
2093
2094         entry = trace_buffer_lock_reserve(*current_rb,
2095                                          type, len, flags, pc);
2096         /*
2097          * If tracing is off, but we have triggers enabled
2098          * we still need to look at the event data. Use the temp_buffer
2099          * to store the trace event for the tigger to use. It's recusive
2100          * safe and will not be recorded anywhere.
2101          */
2102         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2103                 *current_rb = temp_buffer;
2104                 entry = trace_buffer_lock_reserve(*current_rb,
2105                                                   type, len, flags, pc);
2106         }
2107         return entry;
2108 }
2109 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2110
2111 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2112                                      struct ring_buffer *buffer,
2113                                      struct ring_buffer_event *event,
2114                                      unsigned long flags, int pc,
2115                                      struct pt_regs *regs)
2116 {
2117         __buffer_unlock_commit(buffer, event);
2118
2119         /*
2120          * If regs is not set, then skip the following callers:
2121          *   trace_buffer_unlock_commit_regs
2122          *   event_trigger_unlock_commit
2123          *   trace_event_buffer_commit
2124          *   trace_event_raw_event_sched_switch
2125          * Note, we can still get here via blktrace, wakeup tracer
2126          * and mmiotrace, but that's ok if they lose a function or
2127          * two. They are that meaningful.
2128          */
2129         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2130         ftrace_trace_userstack(tr, buffer, flags, pc);
2131 }
2132
2133 void
2134 trace_function(struct trace_array *tr,
2135                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2136                int pc)
2137 {
2138         struct trace_event_call *call = &event_function;
2139         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2140         struct ring_buffer_event *event;
2141         struct ftrace_entry *entry;
2142
2143         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2144                                           flags, pc);
2145         if (!event)
2146                 return;
2147         entry   = ring_buffer_event_data(event);
2148         entry->ip                       = ip;
2149         entry->parent_ip                = parent_ip;
2150
2151         if (!call_filter_check_discard(call, entry, buffer, event))
2152                 __buffer_unlock_commit(buffer, event);
2153 }
2154
2155 #ifdef CONFIG_STACKTRACE
2156
2157 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2158 struct ftrace_stack {
2159         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2160 };
2161
2162 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2163 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2164
2165 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2166                                  unsigned long flags,
2167                                  int skip, int pc, struct pt_regs *regs)
2168 {
2169         struct trace_event_call *call = &event_kernel_stack;
2170         struct ring_buffer_event *event;
2171         struct stack_entry *entry;
2172         struct stack_trace trace;
2173         int use_stack;
2174         int size = FTRACE_STACK_ENTRIES;
2175
2176         trace.nr_entries        = 0;
2177         trace.skip              = skip;
2178
2179         /*
2180          * Add two, for this function and the call to save_stack_trace()
2181          * If regs is set, then these functions will not be in the way.
2182          */
2183         if (!regs)
2184                 trace.skip += 2;
2185
2186         /*
2187          * Since events can happen in NMIs there's no safe way to
2188          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2189          * or NMI comes in, it will just have to use the default
2190          * FTRACE_STACK_SIZE.
2191          */
2192         preempt_disable_notrace();
2193
2194         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2195         /*
2196          * We don't need any atomic variables, just a barrier.
2197          * If an interrupt comes in, we don't care, because it would
2198          * have exited and put the counter back to what we want.
2199          * We just need a barrier to keep gcc from moving things
2200          * around.
2201          */
2202         barrier();
2203         if (use_stack == 1) {
2204                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2205                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2206
2207                 if (regs)
2208                         save_stack_trace_regs(regs, &trace);
2209                 else
2210                         save_stack_trace(&trace);
2211
2212                 if (trace.nr_entries > size)
2213                         size = trace.nr_entries;
2214         } else
2215                 /* From now on, use_stack is a boolean */
2216                 use_stack = 0;
2217
2218         size *= sizeof(unsigned long);
2219
2220         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2221                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2222                                     flags, pc);
2223         if (!event)
2224                 goto out;
2225         entry = ring_buffer_event_data(event);
2226
2227         memset(&entry->caller, 0, size);
2228
2229         if (use_stack)
2230                 memcpy(&entry->caller, trace.entries,
2231                        trace.nr_entries * sizeof(unsigned long));
2232         else {
2233                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2234                 trace.entries           = entry->caller;
2235                 if (regs)
2236                         save_stack_trace_regs(regs, &trace);
2237                 else
2238                         save_stack_trace(&trace);
2239         }
2240
2241         entry->size = trace.nr_entries;
2242
2243         if (!call_filter_check_discard(call, entry, buffer, event))
2244                 __buffer_unlock_commit(buffer, event);
2245
2246  out:
2247         /* Again, don't let gcc optimize things here */
2248         barrier();
2249         __this_cpu_dec(ftrace_stack_reserve);
2250         preempt_enable_notrace();
2251
2252 }
2253
2254 static inline void ftrace_trace_stack(struct trace_array *tr,
2255                                       struct ring_buffer *buffer,
2256                                       unsigned long flags,
2257                                       int skip, int pc, struct pt_regs *regs)
2258 {
2259         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2260                 return;
2261
2262         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2263 }
2264
2265 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2266                    int pc)
2267 {
2268         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2269 }
2270
2271 /**
2272  * trace_dump_stack - record a stack back trace in the trace buffer
2273  * @skip: Number of functions to skip (helper handlers)
2274  */
2275 void trace_dump_stack(int skip)
2276 {
2277         unsigned long flags;
2278
2279         if (tracing_disabled || tracing_selftest_running)
2280                 return;
2281
2282         local_save_flags(flags);
2283
2284         /*
2285          * Skip 3 more, seems to get us at the caller of
2286          * this function.
2287          */
2288         skip += 3;
2289         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2290                              flags, skip, preempt_count(), NULL);
2291 }
2292
2293 static DEFINE_PER_CPU(int, user_stack_count);
2294
2295 void
2296 ftrace_trace_userstack(struct trace_array *tr,
2297                        struct ring_buffer *buffer, unsigned long flags, int pc)
2298 {
2299         struct trace_event_call *call = &event_user_stack;
2300         struct ring_buffer_event *event;
2301         struct userstack_entry *entry;
2302         struct stack_trace trace;
2303
2304         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2305                 return;
2306
2307         /*
2308          * NMIs can not handle page faults, even with fix ups.
2309          * The save user stack can (and often does) fault.
2310          */
2311         if (unlikely(in_nmi()))
2312                 return;
2313
2314         /*
2315          * prevent recursion, since the user stack tracing may
2316          * trigger other kernel events.
2317          */
2318         preempt_disable();
2319         if (__this_cpu_read(user_stack_count))
2320                 goto out;
2321
2322         __this_cpu_inc(user_stack_count);
2323
2324         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2325                                           sizeof(*entry), flags, pc);
2326         if (!event)
2327                 goto out_drop_count;
2328         entry   = ring_buffer_event_data(event);
2329
2330         entry->tgid             = current->tgid;
2331         memset(&entry->caller, 0, sizeof(entry->caller));
2332
2333         trace.nr_entries        = 0;
2334         trace.max_entries       = FTRACE_STACK_ENTRIES;
2335         trace.skip              = 0;
2336         trace.entries           = entry->caller;
2337
2338         save_stack_trace_user(&trace);
2339         if (!call_filter_check_discard(call, entry, buffer, event))
2340                 __buffer_unlock_commit(buffer, event);
2341
2342  out_drop_count:
2343         __this_cpu_dec(user_stack_count);
2344  out:
2345         preempt_enable();
2346 }
2347
2348 #ifdef UNUSED
2349 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2350 {
2351         ftrace_trace_userstack(tr, flags, preempt_count());
2352 }
2353 #endif /* UNUSED */
2354
2355 #endif /* CONFIG_STACKTRACE */
2356
2357 /* created for use with alloc_percpu */
2358 struct trace_buffer_struct {
2359         int nesting;
2360         char buffer[4][TRACE_BUF_SIZE];
2361 };
2362
2363 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2364
2365 /*
2366  * Thise allows for lockless recording.  If we're nested too deeply, then
2367  * this returns NULL.
2368  */
2369 static char *get_trace_buf(void)
2370 {
2371         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2372
2373         if (!trace_percpu_buffer || buffer->nesting >= 4)
2374                 return NULL;
2375
2376         buffer->nesting++;
2377
2378         /* Interrupts must see nesting incremented before we use the buffer */
2379         barrier();
2380         return &buffer->buffer[buffer->nesting - 1][0];
2381 }
2382
2383 static void put_trace_buf(void)
2384 {
2385         /* Don't let the decrement of nesting leak before this */
2386         barrier();
2387         this_cpu_dec(trace_percpu_buffer->nesting);
2388 }
2389
2390 static int alloc_percpu_trace_buffer(void)
2391 {
2392         struct trace_buffer_struct __percpu *buffers;
2393
2394         buffers = alloc_percpu(struct trace_buffer_struct);
2395         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2396                 return -ENOMEM;
2397
2398         trace_percpu_buffer = buffers;
2399         return 0;
2400 }
2401
2402 static int buffers_allocated;
2403
2404 void trace_printk_init_buffers(void)
2405 {
2406         if (buffers_allocated)
2407                 return;
2408
2409         if (alloc_percpu_trace_buffer())
2410                 return;
2411
2412         /* trace_printk() is for debug use only. Don't use it in production. */
2413
2414         pr_warn("\n");
2415         pr_warn("**********************************************************\n");
2416         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417         pr_warn("**                                                      **\n");
2418         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2419         pr_warn("**                                                      **\n");
2420         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2421         pr_warn("** unsafe for production use.                           **\n");
2422         pr_warn("**                                                      **\n");
2423         pr_warn("** If you see this message and you are not debugging    **\n");
2424         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2425         pr_warn("**                                                      **\n");
2426         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2427         pr_warn("**********************************************************\n");
2428
2429         /* Expand the buffers to set size */
2430         tracing_update_buffers();
2431
2432         buffers_allocated = 1;
2433
2434         /*
2435          * trace_printk_init_buffers() can be called by modules.
2436          * If that happens, then we need to start cmdline recording
2437          * directly here. If the global_trace.buffer is already
2438          * allocated here, then this was called by module code.
2439          */
2440         if (global_trace.trace_buffer.buffer)
2441                 tracing_start_cmdline_record();
2442 }
2443
2444 void trace_printk_start_comm(void)
2445 {
2446         /* Start tracing comms if trace printk is set */
2447         if (!buffers_allocated)
2448                 return;
2449         tracing_start_cmdline_record();
2450 }
2451
2452 static void trace_printk_start_stop_comm(int enabled)
2453 {
2454         if (!buffers_allocated)
2455                 return;
2456
2457         if (enabled)
2458                 tracing_start_cmdline_record();
2459         else
2460                 tracing_stop_cmdline_record();
2461 }
2462
2463 /**
2464  * trace_vbprintk - write binary msg to tracing buffer
2465  *
2466  */
2467 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2468 {
2469         struct trace_event_call *call = &event_bprint;
2470         struct ring_buffer_event *event;
2471         struct ring_buffer *buffer;
2472         struct trace_array *tr = &global_trace;
2473         struct bprint_entry *entry;
2474         unsigned long flags;
2475         char *tbuffer;
2476         int len = 0, size, pc;
2477
2478         if (unlikely(tracing_selftest_running || tracing_disabled))
2479                 return 0;
2480
2481         /* Don't pollute graph traces with trace_vprintk internals */
2482         pause_graph_tracing();
2483
2484         pc = preempt_count();
2485         preempt_disable_notrace();
2486
2487         tbuffer = get_trace_buf();
2488         if (!tbuffer) {
2489                 len = 0;
2490                 goto out_nobuffer;
2491         }
2492
2493         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2494
2495         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2496                 goto out;
2497
2498         local_save_flags(flags);
2499         size = sizeof(*entry) + sizeof(u32) * len;
2500         buffer = tr->trace_buffer.buffer;
2501         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2502                                           flags, pc);
2503         if (!event)
2504                 goto out;
2505         entry = ring_buffer_event_data(event);
2506         entry->ip                       = ip;
2507         entry->fmt                      = fmt;
2508
2509         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2510         if (!call_filter_check_discard(call, entry, buffer, event)) {
2511                 __buffer_unlock_commit(buffer, event);
2512                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2513         }
2514
2515 out:
2516         put_trace_buf();
2517
2518 out_nobuffer:
2519         preempt_enable_notrace();
2520         unpause_graph_tracing();
2521
2522         return len;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_vbprintk);
2525
2526 __printf(3, 0)
2527 static int
2528 __trace_array_vprintk(struct ring_buffer *buffer,
2529                       unsigned long ip, const char *fmt, va_list args)
2530 {
2531         struct trace_event_call *call = &event_print;
2532         struct ring_buffer_event *event;
2533         int len = 0, size, pc;
2534         struct print_entry *entry;
2535         unsigned long flags;
2536         char *tbuffer;
2537
2538         if (tracing_disabled || tracing_selftest_running)
2539                 return 0;
2540
2541         /* Don't pollute graph traces with trace_vprintk internals */
2542         pause_graph_tracing();
2543
2544         pc = preempt_count();
2545         preempt_disable_notrace();
2546
2547
2548         tbuffer = get_trace_buf();
2549         if (!tbuffer) {
2550                 len = 0;
2551                 goto out_nobuffer;
2552         }
2553
2554         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2555
2556         local_save_flags(flags);
2557         size = sizeof(*entry) + len + 1;
2558         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2559                                           flags, pc);
2560         if (!event)
2561                 goto out;
2562         entry = ring_buffer_event_data(event);
2563         entry->ip = ip;
2564
2565         memcpy(&entry->buf, tbuffer, len + 1);
2566         if (!call_filter_check_discard(call, entry, buffer, event)) {
2567                 __buffer_unlock_commit(buffer, event);
2568                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2569         }
2570
2571 out:
2572         put_trace_buf();
2573
2574 out_nobuffer:
2575         preempt_enable_notrace();
2576         unpause_graph_tracing();
2577
2578         return len;
2579 }
2580
2581 __printf(3, 0)
2582 int trace_array_vprintk(struct trace_array *tr,
2583                         unsigned long ip, const char *fmt, va_list args)
2584 {
2585         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2586 }
2587
2588 __printf(3, 0)
2589 int trace_array_printk(struct trace_array *tr,
2590                        unsigned long ip, const char *fmt, ...)
2591 {
2592         int ret;
2593         va_list ap;
2594
2595         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2596                 return 0;
2597
2598         if (!tr)
2599                 return -ENOENT;
2600
2601         va_start(ap, fmt);
2602         ret = trace_array_vprintk(tr, ip, fmt, ap);
2603         va_end(ap);
2604         return ret;
2605 }
2606
2607 __printf(3, 4)
2608 int trace_array_printk_buf(struct ring_buffer *buffer,
2609                            unsigned long ip, const char *fmt, ...)
2610 {
2611         int ret;
2612         va_list ap;
2613
2614         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2615                 return 0;
2616
2617         va_start(ap, fmt);
2618         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2619         va_end(ap);
2620         return ret;
2621 }
2622
2623 __printf(2, 0)
2624 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2625 {
2626         return trace_array_vprintk(&global_trace, ip, fmt, args);
2627 }
2628 EXPORT_SYMBOL_GPL(trace_vprintk);
2629
2630 static void trace_iterator_increment(struct trace_iterator *iter)
2631 {
2632         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2633
2634         iter->idx++;
2635         if (buf_iter)
2636                 ring_buffer_read(buf_iter, NULL);
2637 }
2638
2639 static struct trace_entry *
2640 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2641                 unsigned long *lost_events)
2642 {
2643         struct ring_buffer_event *event;
2644         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2645
2646         if (buf_iter)
2647                 event = ring_buffer_iter_peek(buf_iter, ts);
2648         else
2649                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2650                                          lost_events);
2651
2652         if (event) {
2653                 iter->ent_size = ring_buffer_event_length(event);
2654                 return ring_buffer_event_data(event);
2655         }
2656         iter->ent_size = 0;
2657         return NULL;
2658 }
2659
2660 static struct trace_entry *
2661 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2662                   unsigned long *missing_events, u64 *ent_ts)
2663 {
2664         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2665         struct trace_entry *ent, *next = NULL;
2666         unsigned long lost_events = 0, next_lost = 0;
2667         int cpu_file = iter->cpu_file;
2668         u64 next_ts = 0, ts;
2669         int next_cpu = -1;
2670         int next_size = 0;
2671         int cpu;
2672
2673         /*
2674          * If we are in a per_cpu trace file, don't bother by iterating over
2675          * all cpu and peek directly.
2676          */
2677         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2678                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2679                         return NULL;
2680                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2681                 if (ent_cpu)
2682                         *ent_cpu = cpu_file;
2683
2684                 return ent;
2685         }
2686
2687         for_each_tracing_cpu(cpu) {
2688
2689                 if (ring_buffer_empty_cpu(buffer, cpu))
2690                         continue;
2691
2692                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2693
2694                 /*
2695                  * Pick the entry with the smallest timestamp:
2696                  */
2697                 if (ent && (!next || ts < next_ts)) {
2698                         next = ent;
2699                         next_cpu = cpu;
2700                         next_ts = ts;
2701                         next_lost = lost_events;
2702                         next_size = iter->ent_size;
2703                 }
2704         }
2705
2706         iter->ent_size = next_size;
2707
2708         if (ent_cpu)
2709                 *ent_cpu = next_cpu;
2710
2711         if (ent_ts)
2712                 *ent_ts = next_ts;
2713
2714         if (missing_events)
2715                 *missing_events = next_lost;
2716
2717         return next;
2718 }
2719
2720 /* Find the next real entry, without updating the iterator itself */
2721 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2722                                           int *ent_cpu, u64 *ent_ts)
2723 {
2724         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2725 }
2726
2727 /* Find the next real entry, and increment the iterator to the next entry */
2728 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2729 {
2730         iter->ent = __find_next_entry(iter, &iter->cpu,
2731                                       &iter->lost_events, &iter->ts);
2732
2733         if (iter->ent)
2734                 trace_iterator_increment(iter);
2735
2736         return iter->ent ? iter : NULL;
2737 }
2738
2739 static void trace_consume(struct trace_iterator *iter)
2740 {
2741         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2742                             &iter->lost_events);
2743 }
2744
2745 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2746 {
2747         struct trace_iterator *iter = m->private;
2748         int i = (int)*pos;
2749         void *ent;
2750
2751         WARN_ON_ONCE(iter->leftover);
2752
2753         (*pos)++;
2754
2755         /* can't go backwards */
2756         if (iter->idx > i)
2757                 return NULL;
2758
2759         if (iter->idx < 0)
2760                 ent = trace_find_next_entry_inc(iter);
2761         else
2762                 ent = iter;
2763
2764         while (ent && iter->idx < i)
2765                 ent = trace_find_next_entry_inc(iter);
2766
2767         iter->pos = *pos;
2768
2769         return ent;
2770 }
2771
2772 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2773 {
2774         struct ring_buffer_event *event;
2775         struct ring_buffer_iter *buf_iter;
2776         unsigned long entries = 0;
2777         u64 ts;
2778
2779         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2780
2781         buf_iter = trace_buffer_iter(iter, cpu);
2782         if (!buf_iter)
2783                 return;
2784
2785         ring_buffer_iter_reset(buf_iter);
2786
2787         /*
2788          * We could have the case with the max latency tracers
2789          * that a reset never took place on a cpu. This is evident
2790          * by the timestamp being before the start of the buffer.
2791          */
2792         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2793                 if (ts >= iter->trace_buffer->time_start)
2794                         break;
2795                 entries++;
2796                 ring_buffer_read(buf_iter, NULL);
2797         }
2798
2799         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2800 }
2801
2802 /*
2803  * The current tracer is copied to avoid a global locking
2804  * all around.
2805  */
2806 static void *s_start(struct seq_file *m, loff_t *pos)
2807 {
2808         struct trace_iterator *iter = m->private;
2809         struct trace_array *tr = iter->tr;
2810         int cpu_file = iter->cpu_file;
2811         void *p = NULL;
2812         loff_t l = 0;
2813         int cpu;
2814
2815         /*
2816          * copy the tracer to avoid using a global lock all around.
2817          * iter->trace is a copy of current_trace, the pointer to the
2818          * name may be used instead of a strcmp(), as iter->trace->name
2819          * will point to the same string as current_trace->name.
2820          */
2821         mutex_lock(&trace_types_lock);
2822         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2823                 *iter->trace = *tr->current_trace;
2824         mutex_unlock(&trace_types_lock);
2825
2826 #ifdef CONFIG_TRACER_MAX_TRACE
2827         if (iter->snapshot && iter->trace->use_max_tr)
2828                 return ERR_PTR(-EBUSY);
2829 #endif
2830
2831         if (*pos != iter->pos) {
2832                 iter->ent = NULL;
2833                 iter->cpu = 0;
2834                 iter->idx = -1;
2835
2836                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2837                         for_each_tracing_cpu(cpu)
2838                                 tracing_iter_reset(iter, cpu);
2839                 } else
2840                         tracing_iter_reset(iter, cpu_file);
2841
2842                 iter->leftover = 0;
2843                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2844                         ;
2845
2846         } else {
2847                 /*
2848                  * If we overflowed the seq_file before, then we want
2849                  * to just reuse the trace_seq buffer again.
2850                  */
2851                 if (iter->leftover)
2852                         p = iter;
2853                 else {
2854                         l = *pos - 1;
2855                         p = s_next(m, p, &l);
2856                 }
2857         }
2858
2859         trace_event_read_lock();
2860         trace_access_lock(cpu_file);
2861         return p;
2862 }
2863
2864 static void s_stop(struct seq_file *m, void *p)
2865 {
2866         struct trace_iterator *iter = m->private;
2867
2868 #ifdef CONFIG_TRACER_MAX_TRACE
2869         if (iter->snapshot && iter->trace->use_max_tr)
2870                 return;
2871 #endif
2872
2873         trace_access_unlock(iter->cpu_file);
2874         trace_event_read_unlock();
2875 }
2876
2877 static void
2878 get_total_entries(struct trace_buffer *buf,
2879                   unsigned long *total, unsigned long *entries)
2880 {
2881         unsigned long count;
2882         int cpu;
2883
2884         *total = 0;
2885         *entries = 0;
2886
2887         for_each_tracing_cpu(cpu) {
2888                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2889                 /*
2890                  * If this buffer has skipped entries, then we hold all
2891                  * entries for the trace and we need to ignore the
2892                  * ones before the time stamp.
2893                  */
2894                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2895                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2896                         /* total is the same as the entries */
2897                         *total += count;
2898                 } else
2899                         *total += count +
2900                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2901                 *entries += count;
2902         }
2903 }
2904
2905 static void print_lat_help_header(struct seq_file *m)
2906 {
2907         seq_puts(m, "#                  _------=> CPU#            \n"
2908                     "#                 / _-----=> irqs-off        \n"
2909                     "#                | / _----=> need-resched    \n"
2910                     "#                || / _---=> hardirq/softirq \n"
2911                     "#                ||| / _--=> preempt-depth   \n"
2912                     "#                |||| /     delay            \n"
2913                     "#  cmd     pid   ||||| time  |   caller      \n"
2914                     "#     \\   /      |||||  \\    |   /         \n");
2915 }
2916
2917 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2918 {
2919         unsigned long total;
2920         unsigned long entries;
2921
2922         get_total_entries(buf, &total, &entries);
2923         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2924                    entries, total, num_online_cpus());
2925         seq_puts(m, "#\n");
2926 }
2927
2928 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2929 {
2930         print_event_info(buf, m);
2931         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2932                     "#              | |       |          |         |\n");
2933 }
2934
2935 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2936 {
2937         print_event_info(buf, m);
2938         seq_puts(m, "#                              _-----=> irqs-off\n"
2939                     "#                             / _----=> need-resched\n"
2940                     "#                            | / _---=> hardirq/softirq\n"
2941                     "#                            || / _--=> preempt-depth\n"
2942                     "#                            ||| /     delay\n"
2943                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2944                     "#              | |       |   ||||       |         |\n");
2945 }
2946
2947 void
2948 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2949 {
2950         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2951         struct trace_buffer *buf = iter->trace_buffer;
2952         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2953         struct tracer *type = iter->trace;
2954         unsigned long entries;
2955         unsigned long total;
2956         const char *name = "preemption";
2957
2958         name = type->name;
2959
2960         get_total_entries(buf, &total, &entries);
2961
2962         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2963                    name, UTS_RELEASE);
2964         seq_puts(m, "# -----------------------------------"
2965                  "---------------------------------\n");
2966         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2967                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2968                    nsecs_to_usecs(data->saved_latency),
2969                    entries,
2970                    total,
2971                    buf->cpu,
2972 #if defined(CONFIG_PREEMPT_NONE)
2973                    "server",
2974 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2975                    "desktop",
2976 #elif defined(CONFIG_PREEMPT)
2977                    "preempt",
2978 #else
2979                    "unknown",
2980 #endif
2981                    /* These are reserved for later use */
2982                    0, 0, 0, 0);
2983 #ifdef CONFIG_SMP
2984         seq_printf(m, " #P:%d)\n", num_online_cpus());
2985 #else
2986         seq_puts(m, ")\n");
2987 #endif
2988         seq_puts(m, "#    -----------------\n");
2989         seq_printf(m, "#    | task: %.16s-%d "
2990                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2991                    data->comm, data->pid,
2992                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2993                    data->policy, data->rt_priority);
2994         seq_puts(m, "#    -----------------\n");
2995
2996         if (data->critical_start) {
2997                 seq_puts(m, "#  => started at: ");
2998                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2999                 trace_print_seq(m, &iter->seq);
3000                 seq_puts(m, "\n#  => ended at:   ");
3001                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3002                 trace_print_seq(m, &iter->seq);
3003                 seq_puts(m, "\n#\n");
3004         }
3005
3006         seq_puts(m, "#\n");
3007 }
3008
3009 static void test_cpu_buff_start(struct trace_iterator *iter)
3010 {
3011         struct trace_seq *s = &iter->seq;
3012         struct trace_array *tr = iter->tr;
3013
3014         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3015                 return;
3016
3017         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3018                 return;
3019
3020         if (cpumask_available(iter->started) &&
3021             cpumask_test_cpu(iter->cpu, iter->started))
3022                 return;
3023
3024         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3025                 return;
3026
3027         if (cpumask_available(iter->started))
3028                 cpumask_set_cpu(iter->cpu, iter->started);
3029
3030         /* Don't print started cpu buffer for the first entry of the trace */
3031         if (iter->idx > 1)
3032                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3033                                 iter->cpu);
3034 }
3035
3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3037 {
3038         struct trace_array *tr = iter->tr;
3039         struct trace_seq *s = &iter->seq;
3040         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3041         struct trace_entry *entry;
3042         struct trace_event *event;
3043
3044         entry = iter->ent;
3045
3046         test_cpu_buff_start(iter);
3047
3048         event = ftrace_find_event(entry->type);
3049
3050         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3051                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3052                         trace_print_lat_context(iter);
3053                 else
3054                         trace_print_context(iter);
3055         }
3056
3057         if (trace_seq_has_overflowed(s))
3058                 return TRACE_TYPE_PARTIAL_LINE;
3059
3060         if (event)
3061                 return event->funcs->trace(iter, sym_flags, event);
3062
3063         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3064
3065         return trace_handle_return(s);
3066 }
3067
3068 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3069 {
3070         struct trace_array *tr = iter->tr;
3071         struct trace_seq *s = &iter->seq;
3072         struct trace_entry *entry;
3073         struct trace_event *event;
3074
3075         entry = iter->ent;
3076
3077         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3078                 trace_seq_printf(s, "%d %d %llu ",
3079                                  entry->pid, iter->cpu, iter->ts);
3080
3081         if (trace_seq_has_overflowed(s))
3082                 return TRACE_TYPE_PARTIAL_LINE;
3083
3084         event = ftrace_find_event(entry->type);
3085         if (event)
3086                 return event->funcs->raw(iter, 0, event);
3087
3088         trace_seq_printf(s, "%d ?\n", entry->type);
3089
3090         return trace_handle_return(s);
3091 }
3092
3093 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3094 {
3095         struct trace_array *tr = iter->tr;
3096         struct trace_seq *s = &iter->seq;
3097         unsigned char newline = '\n';
3098         struct trace_entry *entry;
3099         struct trace_event *event;
3100
3101         entry = iter->ent;
3102
3103         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3104                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3105                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3106                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3107                 if (trace_seq_has_overflowed(s))
3108                         return TRACE_TYPE_PARTIAL_LINE;
3109         }
3110
3111         event = ftrace_find_event(entry->type);
3112         if (event) {
3113                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3114                 if (ret != TRACE_TYPE_HANDLED)
3115                         return ret;
3116         }
3117
3118         SEQ_PUT_FIELD(s, newline);
3119
3120         return trace_handle_return(s);
3121 }
3122
3123 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3124 {
3125         struct trace_array *tr = iter->tr;
3126         struct trace_seq *s = &iter->seq;
3127         struct trace_entry *entry;
3128         struct trace_event *event;
3129
3130         entry = iter->ent;
3131
3132         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3133                 SEQ_PUT_FIELD(s, entry->pid);
3134                 SEQ_PUT_FIELD(s, iter->cpu);
3135                 SEQ_PUT_FIELD(s, iter->ts);
3136                 if (trace_seq_has_overflowed(s))
3137                         return TRACE_TYPE_PARTIAL_LINE;
3138         }
3139
3140         event = ftrace_find_event(entry->type);
3141         return event ? event->funcs->binary(iter, 0, event) :
3142                 TRACE_TYPE_HANDLED;
3143 }
3144
3145 int trace_empty(struct trace_iterator *iter)
3146 {
3147         struct ring_buffer_iter *buf_iter;
3148         int cpu;
3149
3150         /* If we are looking at one CPU buffer, only check that one */
3151         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3152                 cpu = iter->cpu_file;
3153                 buf_iter = trace_buffer_iter(iter, cpu);
3154                 if (buf_iter) {
3155                         if (!ring_buffer_iter_empty(buf_iter))
3156                                 return 0;
3157                 } else {
3158                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3159                                 return 0;
3160                 }
3161                 return 1;
3162         }
3163
3164         for_each_tracing_cpu(cpu) {
3165                 buf_iter = trace_buffer_iter(iter, cpu);
3166                 if (buf_iter) {
3167                         if (!ring_buffer_iter_empty(buf_iter))
3168                                 return 0;
3169                 } else {
3170                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3171                                 return 0;
3172                 }
3173         }
3174
3175         return 1;
3176 }
3177
3178 /*  Called with trace_event_read_lock() held. */
3179 enum print_line_t print_trace_line(struct trace_iterator *iter)
3180 {
3181         struct trace_array *tr = iter->tr;
3182         unsigned long trace_flags = tr->trace_flags;
3183         enum print_line_t ret;
3184
3185         if (iter->lost_events) {
3186                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3187                                  iter->cpu, iter->lost_events);
3188                 if (trace_seq_has_overflowed(&iter->seq))
3189                         return TRACE_TYPE_PARTIAL_LINE;
3190         }
3191
3192         if (iter->trace && iter->trace->print_line) {
3193                 ret = iter->trace->print_line(iter);
3194                 if (ret != TRACE_TYPE_UNHANDLED)
3195                         return ret;
3196         }
3197
3198         if (iter->ent->type == TRACE_BPUTS &&
3199                         trace_flags & TRACE_ITER_PRINTK &&
3200                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3201                 return trace_print_bputs_msg_only(iter);
3202
3203         if (iter->ent->type == TRACE_BPRINT &&
3204                         trace_flags & TRACE_ITER_PRINTK &&
3205                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3206                 return trace_print_bprintk_msg_only(iter);
3207
3208         if (iter->ent->type == TRACE_PRINT &&
3209                         trace_flags & TRACE_ITER_PRINTK &&
3210                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3211                 return trace_print_printk_msg_only(iter);
3212
3213         if (trace_flags & TRACE_ITER_BIN)
3214                 return print_bin_fmt(iter);
3215
3216         if (trace_flags & TRACE_ITER_HEX)
3217                 return print_hex_fmt(iter);
3218
3219         if (trace_flags & TRACE_ITER_RAW)
3220                 return print_raw_fmt(iter);
3221
3222         return print_trace_fmt(iter);
3223 }
3224
3225 void trace_latency_header(struct seq_file *m)
3226 {
3227         struct trace_iterator *iter = m->private;
3228         struct trace_array *tr = iter->tr;
3229
3230         /* print nothing if the buffers are empty */
3231         if (trace_empty(iter))
3232                 return;
3233
3234         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3235                 print_trace_header(m, iter);
3236
3237         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3238                 print_lat_help_header(m);
3239 }
3240
3241 void trace_default_header(struct seq_file *m)
3242 {
3243         struct trace_iterator *iter = m->private;
3244         struct trace_array *tr = iter->tr;
3245         unsigned long trace_flags = tr->trace_flags;
3246
3247         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3248                 return;
3249
3250         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3251                 /* print nothing if the buffers are empty */
3252                 if (trace_empty(iter))
3253                         return;
3254                 print_trace_header(m, iter);
3255                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3256                         print_lat_help_header(m);
3257         } else {
3258                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3259                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3260                                 print_func_help_header_irq(iter->trace_buffer, m);
3261                         else
3262                                 print_func_help_header(iter->trace_buffer, m);
3263                 }
3264         }
3265 }
3266
3267 static void test_ftrace_alive(struct seq_file *m)
3268 {
3269         if (!ftrace_is_dead())
3270                 return;
3271         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3272                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3273 }
3274
3275 #ifdef CONFIG_TRACER_MAX_TRACE
3276 static void show_snapshot_main_help(struct seq_file *m)
3277 {
3278         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3279                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3280                     "#                      Takes a snapshot of the main buffer.\n"
3281                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3282                     "#                      (Doesn't have to be '2' works with any number that\n"
3283                     "#                       is not a '0' or '1')\n");
3284 }
3285
3286 static void show_snapshot_percpu_help(struct seq_file *m)
3287 {
3288         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3289 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3290         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3291                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3292 #else
3293         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3294                     "#                     Must use main snapshot file to allocate.\n");
3295 #endif
3296         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3297                     "#                      (Doesn't have to be '2' works with any number that\n"
3298                     "#                       is not a '0' or '1')\n");
3299 }
3300
3301 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3302 {
3303         if (iter->tr->allocated_snapshot)
3304                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3305         else
3306                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3307
3308         seq_puts(m, "# Snapshot commands:\n");
3309         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3310                 show_snapshot_main_help(m);
3311         else
3312                 show_snapshot_percpu_help(m);
3313 }
3314 #else
3315 /* Should never be called */
3316 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3317 #endif
3318
3319 static int s_show(struct seq_file *m, void *v)
3320 {
3321         struct trace_iterator *iter = v;
3322         int ret;
3323
3324         if (iter->ent == NULL) {
3325                 if (iter->tr) {
3326                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3327                         seq_puts(m, "#\n");
3328                         test_ftrace_alive(m);
3329                 }
3330                 if (iter->snapshot && trace_empty(iter))
3331                         print_snapshot_help(m, iter);
3332                 else if (iter->trace && iter->trace->print_header)
3333                         iter->trace->print_header(m);
3334                 else
3335                         trace_default_header(m);
3336
3337         } else if (iter->leftover) {
3338                 /*
3339                  * If we filled the seq_file buffer earlier, we
3340                  * want to just show it now.
3341                  */
3342                 ret = trace_print_seq(m, &iter->seq);
3343
3344                 /* ret should this time be zero, but you never know */
3345                 iter->leftover = ret;
3346
3347         } else {
3348                 print_trace_line(iter);
3349                 ret = trace_print_seq(m, &iter->seq);
3350                 /*
3351                  * If we overflow the seq_file buffer, then it will
3352                  * ask us for this data again at start up.
3353                  * Use that instead.
3354                  *  ret is 0 if seq_file write succeeded.
3355                  *        -1 otherwise.
3356                  */
3357                 iter->leftover = ret;
3358         }
3359
3360         return 0;
3361 }
3362
3363 /*
3364  * Should be used after trace_array_get(), trace_types_lock
3365  * ensures that i_cdev was already initialized.
3366  */
3367 static inline int tracing_get_cpu(struct inode *inode)
3368 {
3369         if (inode->i_cdev) /* See trace_create_cpu_file() */
3370                 return (long)inode->i_cdev - 1;
3371         return RING_BUFFER_ALL_CPUS;
3372 }
3373
3374 static const struct seq_operations tracer_seq_ops = {
3375         .start          = s_start,
3376         .next           = s_next,
3377         .stop           = s_stop,
3378         .show           = s_show,
3379 };
3380
3381 static struct trace_iterator *
3382 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3383 {
3384         struct trace_array *tr = inode->i_private;
3385         struct trace_iterator *iter;
3386         int cpu;
3387
3388         if (tracing_disabled)
3389                 return ERR_PTR(-ENODEV);
3390
3391         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3392         if (!iter)
3393                 return ERR_PTR(-ENOMEM);
3394
3395         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3396                                     GFP_KERNEL);
3397         if (!iter->buffer_iter)
3398                 goto release;
3399
3400         /*
3401          * We make a copy of the current tracer to avoid concurrent
3402          * changes on it while we are reading.
3403          */
3404         mutex_lock(&trace_types_lock);
3405         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3406         if (!iter->trace)
3407                 goto fail;
3408
3409         *iter->trace = *tr->current_trace;
3410
3411         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3412                 goto fail;
3413
3414         iter->tr = tr;
3415
3416 #ifdef CONFIG_TRACER_MAX_TRACE
3417         /* Currently only the top directory has a snapshot */
3418         if (tr->current_trace->print_max || snapshot)
3419                 iter->trace_buffer = &tr->max_buffer;
3420         else
3421 #endif
3422                 iter->trace_buffer = &tr->trace_buffer;
3423         iter->snapshot = snapshot;
3424         iter->pos = -1;
3425         iter->cpu_file = tracing_get_cpu(inode);
3426         mutex_init(&iter->mutex);
3427
3428         /* Notify the tracer early; before we stop tracing. */
3429         if (iter->trace && iter->trace->open)
3430                 iter->trace->open(iter);
3431
3432         /* Annotate start of buffers if we had overruns */
3433         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3434                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3435
3436         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3437         if (trace_clocks[tr->clock_id].in_ns)
3438                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3439
3440         /* stop the trace while dumping if we are not opening "snapshot" */
3441         if (!iter->snapshot)
3442                 tracing_stop_tr(tr);
3443
3444         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3445                 for_each_tracing_cpu(cpu) {
3446                         iter->buffer_iter[cpu] =
3447                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3448                                                          cpu, GFP_KERNEL);
3449                 }
3450                 ring_buffer_read_prepare_sync();
3451                 for_each_tracing_cpu(cpu) {
3452                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3453                         tracing_iter_reset(iter, cpu);
3454                 }
3455         } else {
3456                 cpu = iter->cpu_file;
3457                 iter->buffer_iter[cpu] =
3458                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3459                                                  cpu, GFP_KERNEL);
3460                 ring_buffer_read_prepare_sync();
3461                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3462                 tracing_iter_reset(iter, cpu);
3463         }
3464
3465         mutex_unlock(&trace_types_lock);
3466
3467         return iter;
3468
3469  fail:
3470         mutex_unlock(&trace_types_lock);
3471         kfree(iter->trace);
3472         kfree(iter->buffer_iter);
3473 release:
3474         seq_release_private(inode, file);
3475         return ERR_PTR(-ENOMEM);
3476 }
3477
3478 int tracing_open_generic(struct inode *inode, struct file *filp)
3479 {
3480         if (tracing_disabled)
3481                 return -ENODEV;
3482
3483         filp->private_data = inode->i_private;
3484         return 0;
3485 }
3486
3487 bool tracing_is_disabled(void)
3488 {
3489         return (tracing_disabled) ? true: false;
3490 }
3491
3492 /*
3493  * Open and update trace_array ref count.
3494  * Must have the current trace_array passed to it.
3495  */
3496 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3497 {
3498         struct trace_array *tr = inode->i_private;
3499
3500         if (tracing_disabled)
3501                 return -ENODEV;
3502
3503         if (trace_array_get(tr) < 0)
3504                 return -ENODEV;
3505
3506         filp->private_data = inode->i_private;
3507
3508         return 0;
3509 }
3510
3511 static int tracing_release(struct inode *inode, struct file *file)
3512 {
3513         struct trace_array *tr = inode->i_private;
3514         struct seq_file *m = file->private_data;
3515         struct trace_iterator *iter;
3516         int cpu;
3517
3518         if (!(file->f_mode & FMODE_READ)) {
3519                 trace_array_put(tr);
3520                 return 0;
3521         }
3522
3523         /* Writes do not use seq_file */
3524         iter = m->private;
3525         mutex_lock(&trace_types_lock);
3526
3527         for_each_tracing_cpu(cpu) {
3528                 if (iter->buffer_iter[cpu])
3529                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3530         }
3531
3532         if (iter->trace && iter->trace->close)
3533                 iter->trace->close(iter);
3534
3535         if (!iter->snapshot)
3536                 /* reenable tracing if it was previously enabled */
3537                 tracing_start_tr(tr);
3538
3539         __trace_array_put(tr);
3540
3541         mutex_unlock(&trace_types_lock);
3542
3543         mutex_destroy(&iter->mutex);
3544         free_cpumask_var(iter->started);
3545         kfree(iter->trace);
3546         kfree(iter->buffer_iter);
3547         seq_release_private(inode, file);
3548
3549         return 0;
3550 }
3551
3552 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3553 {
3554         struct trace_array *tr = inode->i_private;
3555
3556         trace_array_put(tr);
3557         return 0;
3558 }
3559
3560 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3561 {
3562         struct trace_array *tr = inode->i_private;
3563
3564         trace_array_put(tr);
3565
3566         return single_release(inode, file);
3567 }
3568
3569 static int tracing_open(struct inode *inode, struct file *file)
3570 {
3571         struct trace_array *tr = inode->i_private;
3572         struct trace_iterator *iter;
3573         int ret = 0;
3574
3575         if (trace_array_get(tr) < 0)
3576                 return -ENODEV;
3577
3578         /* If this file was open for write, then erase contents */
3579         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3580                 int cpu = tracing_get_cpu(inode);
3581                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3582
3583 #ifdef CONFIG_TRACER_MAX_TRACE
3584                 if (tr->current_trace->print_max)
3585                         trace_buf = &tr->max_buffer;
3586 #endif
3587
3588                 if (cpu == RING_BUFFER_ALL_CPUS)
3589                         tracing_reset_online_cpus(trace_buf);
3590                 else
3591                         tracing_reset(trace_buf, cpu);
3592         }
3593
3594         if (file->f_mode & FMODE_READ) {
3595                 iter = __tracing_open(inode, file, false);
3596                 if (IS_ERR(iter))
3597                         ret = PTR_ERR(iter);
3598                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3599                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3600         }
3601
3602         if (ret < 0)
3603                 trace_array_put(tr);
3604
3605         return ret;
3606 }
3607
3608 /*
3609  * Some tracers are not suitable for instance buffers.
3610  * A tracer is always available for the global array (toplevel)
3611  * or if it explicitly states that it is.
3612  */
3613 static bool
3614 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3615 {
3616         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3617 }
3618
3619 /* Find the next tracer that this trace array may use */
3620 static struct tracer *
3621 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3622 {
3623         while (t && !trace_ok_for_array(t, tr))
3624                 t = t->next;
3625
3626         return t;
3627 }
3628
3629 static void *
3630 t_next(struct seq_file *m, void *v, loff_t *pos)
3631 {
3632         struct trace_array *tr = m->private;
3633         struct tracer *t = v;
3634
3635         (*pos)++;
3636
3637         if (t)
3638                 t = get_tracer_for_array(tr, t->next);
3639
3640         return t;
3641 }
3642
3643 static void *t_start(struct seq_file *m, loff_t *pos)
3644 {
3645         struct trace_array *tr = m->private;
3646         struct tracer *t;
3647         loff_t l = 0;
3648
3649         mutex_lock(&trace_types_lock);
3650
3651         t = get_tracer_for_array(tr, trace_types);
3652         for (; t && l < *pos; t = t_next(m, t, &l))
3653                         ;
3654
3655         return t;
3656 }
3657
3658 static void t_stop(struct seq_file *m, void *p)
3659 {
3660         mutex_unlock(&trace_types_lock);
3661 }
3662
3663 static int t_show(struct seq_file *m, void *v)
3664 {
3665         struct tracer *t = v;
3666
3667         if (!t)
3668                 return 0;
3669
3670         seq_puts(m, t->name);
3671         if (t->next)
3672                 seq_putc(m, ' ');
3673         else
3674                 seq_putc(m, '\n');
3675
3676         return 0;
3677 }
3678
3679 static const struct seq_operations show_traces_seq_ops = {
3680         .start          = t_start,
3681         .next           = t_next,
3682         .stop           = t_stop,
3683         .show           = t_show,
3684 };
3685
3686 static int show_traces_open(struct inode *inode, struct file *file)
3687 {
3688         struct trace_array *tr = inode->i_private;
3689         struct seq_file *m;
3690         int ret;
3691
3692         if (tracing_disabled)
3693                 return -ENODEV;
3694
3695         if (trace_array_get(tr) < 0)
3696                 return -ENODEV;
3697
3698         ret = seq_open(file, &show_traces_seq_ops);
3699         if (ret) {
3700                 trace_array_put(tr);
3701                 return ret;
3702         }
3703
3704         m = file->private_data;
3705         m->private = tr;
3706
3707         return 0;
3708 }
3709
3710 static int show_traces_release(struct inode *inode, struct file *file)
3711 {
3712         struct trace_array *tr = inode->i_private;
3713
3714         trace_array_put(tr);
3715         return seq_release(inode, file);
3716 }
3717
3718 static ssize_t
3719 tracing_write_stub(struct file *filp, const char __user *ubuf,
3720                    size_t count, loff_t *ppos)
3721 {
3722         return count;
3723 }
3724
3725 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3726 {
3727         int ret;
3728
3729         if (file->f_mode & FMODE_READ)
3730                 ret = seq_lseek(file, offset, whence);
3731         else
3732                 file->f_pos = ret = 0;
3733
3734         return ret;
3735 }
3736
3737 static const struct file_operations tracing_fops = {
3738         .open           = tracing_open,
3739         .read           = seq_read,
3740         .write          = tracing_write_stub,
3741         .llseek         = tracing_lseek,
3742         .release        = tracing_release,
3743 };
3744
3745 static const struct file_operations show_traces_fops = {
3746         .open           = show_traces_open,
3747         .read           = seq_read,
3748         .llseek         = seq_lseek,
3749         .release        = show_traces_release,
3750 };
3751
3752 static ssize_t
3753 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3754                      size_t count, loff_t *ppos)
3755 {
3756         struct trace_array *tr = file_inode(filp)->i_private;
3757         char *mask_str;
3758         int len;
3759
3760         len = snprintf(NULL, 0, "%*pb\n",
3761                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3762         mask_str = kmalloc(len, GFP_KERNEL);
3763         if (!mask_str)
3764                 return -ENOMEM;
3765
3766         len = snprintf(mask_str, len, "%*pb\n",
3767                        cpumask_pr_args(tr->tracing_cpumask));
3768         if (len >= count) {
3769                 count = -EINVAL;
3770                 goto out_err;
3771         }
3772         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3773
3774 out_err:
3775         kfree(mask_str);
3776
3777         return count;
3778 }
3779
3780 static ssize_t
3781 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3782                       size_t count, loff_t *ppos)
3783 {
3784         struct trace_array *tr = file_inode(filp)->i_private;
3785         cpumask_var_t tracing_cpumask_new;
3786         int err, cpu;
3787
3788         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3789                 return -ENOMEM;
3790
3791         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3792         if (err)
3793                 goto err_unlock;
3794
3795         local_irq_disable();
3796         arch_spin_lock(&tr->max_lock);
3797         for_each_tracing_cpu(cpu) {
3798                 /*
3799                  * Increase/decrease the disabled counter if we are
3800                  * about to flip a bit in the cpumask:
3801                  */
3802                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3803                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3804                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3805                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3806                 }
3807                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3808                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3809                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3810                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3811                 }
3812         }
3813         arch_spin_unlock(&tr->max_lock);
3814         local_irq_enable();
3815
3816         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3817         free_cpumask_var(tracing_cpumask_new);
3818
3819         return count;
3820
3821 err_unlock:
3822         free_cpumask_var(tracing_cpumask_new);
3823
3824         return err;
3825 }
3826
3827 static const struct file_operations tracing_cpumask_fops = {
3828         .open           = tracing_open_generic_tr,
3829         .read           = tracing_cpumask_read,
3830         .write          = tracing_cpumask_write,
3831         .release        = tracing_release_generic_tr,
3832         .llseek         = generic_file_llseek,
3833 };
3834
3835 static int tracing_trace_options_show(struct seq_file *m, void *v)
3836 {
3837         struct tracer_opt *trace_opts;
3838         struct trace_array *tr = m->private;
3839         u32 tracer_flags;
3840         int i;
3841
3842         mutex_lock(&trace_types_lock);
3843         tracer_flags = tr->current_trace->flags->val;
3844         trace_opts = tr->current_trace->flags->opts;
3845
3846         for (i = 0; trace_options[i]; i++) {
3847                 if (tr->trace_flags & (1 << i))
3848                         seq_printf(m, "%s\n", trace_options[i]);
3849                 else
3850                         seq_printf(m, "no%s\n", trace_options[i]);
3851         }
3852
3853         for (i = 0; trace_opts[i].name; i++) {
3854                 if (tracer_flags & trace_opts[i].bit)
3855                         seq_printf(m, "%s\n", trace_opts[i].name);
3856                 else
3857                         seq_printf(m, "no%s\n", trace_opts[i].name);
3858         }
3859         mutex_unlock(&trace_types_lock);
3860
3861         return 0;
3862 }
3863
3864 static int __set_tracer_option(struct trace_array *tr,
3865                                struct tracer_flags *tracer_flags,
3866                                struct tracer_opt *opts, int neg)
3867 {
3868         struct tracer *trace = tracer_flags->trace;
3869         int ret;
3870
3871         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3872         if (ret)
3873                 return ret;
3874
3875         if (neg)
3876                 tracer_flags->val &= ~opts->bit;
3877         else
3878                 tracer_flags->val |= opts->bit;
3879         return 0;
3880 }
3881
3882 /* Try to assign a tracer specific option */
3883 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3884 {
3885         struct tracer *trace = tr->current_trace;
3886         struct tracer_flags *tracer_flags = trace->flags;
3887         struct tracer_opt *opts = NULL;
3888         int i;
3889
3890         for (i = 0; tracer_flags->opts[i].name; i++) {
3891                 opts = &tracer_flags->opts[i];
3892
3893                 if (strcmp(cmp, opts->name) == 0)
3894                         return __set_tracer_option(tr, trace->flags, opts, neg);
3895         }
3896
3897         return -EINVAL;
3898 }
3899
3900 /* Some tracers require overwrite to stay enabled */
3901 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3902 {
3903         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3904                 return -1;
3905
3906         return 0;
3907 }
3908
3909 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3910 {
3911         /* do nothing if flag is already set */
3912         if (!!(tr->trace_flags & mask) == !!enabled)
3913                 return 0;
3914
3915         /* Give the tracer a chance to approve the change */
3916         if (tr->current_trace->flag_changed)
3917                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3918                         return -EINVAL;
3919
3920         if (enabled)
3921                 tr->trace_flags |= mask;
3922         else
3923                 tr->trace_flags &= ~mask;
3924
3925         if (mask == TRACE_ITER_RECORD_CMD)
3926                 trace_event_enable_cmd_record(enabled);
3927
3928         if (mask == TRACE_ITER_EVENT_FORK)
3929                 trace_event_follow_fork(tr, enabled);
3930
3931         if (mask == TRACE_ITER_OVERWRITE) {
3932                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3935 #endif
3936         }
3937
3938         if (mask == TRACE_ITER_PRINTK) {
3939                 trace_printk_start_stop_comm(enabled);
3940                 trace_printk_control(enabled);
3941         }
3942
3943         return 0;
3944 }
3945
3946 static int trace_set_options(struct trace_array *tr, char *option)
3947 {
3948         char *cmp;
3949         int neg = 0;
3950         int ret = -ENODEV;
3951         int i;
3952         size_t orig_len = strlen(option);
3953
3954         cmp = strstrip(option);
3955
3956         if (strncmp(cmp, "no", 2) == 0) {
3957                 neg = 1;
3958                 cmp += 2;
3959         }
3960
3961         mutex_lock(&trace_types_lock);
3962
3963         for (i = 0; trace_options[i]; i++) {
3964                 if (strcmp(cmp, trace_options[i]) == 0) {
3965                         ret = set_tracer_flag(tr, 1 << i, !neg);
3966                         break;
3967                 }
3968         }
3969
3970         /* If no option could be set, test the specific tracer options */
3971         if (!trace_options[i])
3972                 ret = set_tracer_option(tr, cmp, neg);
3973
3974         mutex_unlock(&trace_types_lock);
3975
3976         /*
3977          * If the first trailing whitespace is replaced with '\0' by strstrip,
3978          * turn it back into a space.
3979          */
3980         if (orig_len > strlen(option))
3981                 option[strlen(option)] = ' ';
3982
3983         return ret;
3984 }
3985
3986 static void __init apply_trace_boot_options(void)
3987 {
3988         char *buf = trace_boot_options_buf;
3989         char *option;
3990
3991         while (true) {
3992                 option = strsep(&buf, ",");
3993
3994                 if (!option)
3995                         break;
3996
3997                 if (*option)
3998                         trace_set_options(&global_trace, option);
3999
4000                 /* Put back the comma to allow this to be called again */
4001                 if (buf)
4002                         *(buf - 1) = ',';
4003         }
4004 }
4005
4006 static ssize_t
4007 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4008                         size_t cnt, loff_t *ppos)
4009 {
4010         struct seq_file *m = filp->private_data;
4011         struct trace_array *tr = m->private;
4012         char buf[64];
4013         int ret;
4014
4015         if (cnt >= sizeof(buf))
4016                 return -EINVAL;
4017
4018         if (copy_from_user(buf, ubuf, cnt))
4019                 return -EFAULT;
4020
4021         buf[cnt] = 0;
4022
4023         ret = trace_set_options(tr, buf);
4024         if (ret < 0)
4025                 return ret;
4026
4027         *ppos += cnt;
4028
4029         return cnt;
4030 }
4031
4032 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4033 {
4034         struct trace_array *tr = inode->i_private;
4035         int ret;
4036
4037         if (tracing_disabled)
4038                 return -ENODEV;
4039
4040         if (trace_array_get(tr) < 0)
4041                 return -ENODEV;
4042
4043         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4044         if (ret < 0)
4045                 trace_array_put(tr);
4046
4047         return ret;
4048 }
4049
4050 static const struct file_operations tracing_iter_fops = {
4051         .open           = tracing_trace_options_open,
4052         .read           = seq_read,
4053         .llseek         = seq_lseek,
4054         .release        = tracing_single_release_tr,
4055         .write          = tracing_trace_options_write,
4056 };
4057
4058 static const char readme_msg[] =
4059         "tracing mini-HOWTO:\n\n"
4060         "# echo 0 > tracing_on : quick way to disable tracing\n"
4061         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4062         " Important files:\n"
4063         "  trace\t\t\t- The static contents of the buffer\n"
4064         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4065         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4066         "  current_tracer\t- function and latency tracers\n"
4067         "  available_tracers\t- list of configured tracers for current_tracer\n"
4068         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4069         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4070         "  trace_clock\t\t-change the clock used to order events\n"
4071         "       local:   Per cpu clock but may not be synced across CPUs\n"
4072         "      global:   Synced across CPUs but slows tracing down.\n"
4073         "     counter:   Not a clock, but just an increment\n"
4074         "      uptime:   Jiffy counter from time of boot\n"
4075         "        perf:   Same clock that perf events use\n"
4076 #ifdef CONFIG_X86_64
4077         "     x86-tsc:   TSC cycle counter\n"
4078 #endif
4079         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4080         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4081         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4082         "\t\t\t  Remove sub-buffer with rmdir\n"
4083         "  trace_options\t\t- Set format or modify how tracing happens\n"
4084         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4085         "\t\t\t  option name\n"
4086         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4087 #ifdef CONFIG_DYNAMIC_FTRACE
4088         "\n  available_filter_functions - list of functions that can be filtered on\n"
4089         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4090         "\t\t\t  functions\n"
4091         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4092         "\t     modules: Can select a group via module\n"
4093         "\t      Format: :mod:<module-name>\n"
4094         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4095         "\t    triggers: a command to perform when function is hit\n"
4096         "\t      Format: <function>:<trigger>[:count]\n"
4097         "\t     trigger: traceon, traceoff\n"
4098         "\t\t      enable_event:<system>:<event>\n"
4099         "\t\t      disable_event:<system>:<event>\n"
4100 #ifdef CONFIG_STACKTRACE
4101         "\t\t      stacktrace\n"
4102 #endif
4103 #ifdef CONFIG_TRACER_SNAPSHOT
4104         "\t\t      snapshot\n"
4105 #endif
4106         "\t\t      dump\n"
4107         "\t\t      cpudump\n"
4108         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4109         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4110         "\t     The first one will disable tracing every time do_fault is hit\n"
4111         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4112         "\t       The first time do trap is hit and it disables tracing, the\n"
4113         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4114         "\t       the counter will not decrement. It only decrements when the\n"
4115         "\t       trigger did work\n"
4116         "\t     To remove trigger without count:\n"
4117         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4118         "\t     To remove trigger with a count:\n"
4119         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4120         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4121         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4122         "\t    modules: Can select a group via module command :mod:\n"
4123         "\t    Does not accept triggers\n"
4124 #endif /* CONFIG_DYNAMIC_FTRACE */
4125 #ifdef CONFIG_FUNCTION_TRACER
4126         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4127         "\t\t    (function)\n"
4128 #endif
4129 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4130         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4131         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4132         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4133 #endif
4134 #ifdef CONFIG_TRACER_SNAPSHOT
4135         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4136         "\t\t\t  snapshot buffer. Read the contents for more\n"
4137         "\t\t\t  information\n"
4138 #endif
4139 #ifdef CONFIG_STACK_TRACER
4140         "  stack_trace\t\t- Shows the max stack trace when active\n"
4141         "  stack_max_size\t- Shows current max stack size that was traced\n"
4142         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4143         "\t\t\t  new trace)\n"
4144 #ifdef CONFIG_DYNAMIC_FTRACE
4145         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4146         "\t\t\t  traces\n"
4147 #endif
4148 #endif /* CONFIG_STACK_TRACER */
4149 #ifdef CONFIG_KPROBE_EVENT
4150         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4151         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4152 #endif
4153 #ifdef CONFIG_UPROBE_EVENT
4154         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4155         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4156 #endif
4157 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4158         "\t  accepts: event-definitions (one definition per line)\n"
4159         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4160         "\t           -:[<group>/]<event>\n"
4161 #ifdef CONFIG_KPROBE_EVENT
4162         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4163 #endif
4164 #ifdef CONFIG_UPROBE_EVENT
4165         "\t    place: <path>:<offset>\n"
4166 #endif
4167         "\t     args: <name>=fetcharg[:type]\n"
4168         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4169         "\t           $stack<index>, $stack, $retval, $comm\n"
4170         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4171         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4172 #endif
4173         "  events/\t\t- Directory containing all trace event subsystems:\n"
4174         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4175         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4176         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4177         "\t\t\t  events\n"
4178         "      filter\t\t- If set, only events passing filter are traced\n"
4179         "  events/<system>/<event>/\t- Directory containing control files for\n"
4180         "\t\t\t  <event>:\n"
4181         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4182         "      filter\t\t- If set, only events passing filter are traced\n"
4183         "      trigger\t\t- If set, a command to perform when event is hit\n"
4184         "\t    Format: <trigger>[:count][if <filter>]\n"
4185         "\t   trigger: traceon, traceoff\n"
4186         "\t            enable_event:<system>:<event>\n"
4187         "\t            disable_event:<system>:<event>\n"
4188 #ifdef CONFIG_HIST_TRIGGERS
4189         "\t            enable_hist:<system>:<event>\n"
4190         "\t            disable_hist:<system>:<event>\n"
4191 #endif
4192 #ifdef CONFIG_STACKTRACE
4193         "\t\t    stacktrace\n"
4194 #endif
4195 #ifdef CONFIG_TRACER_SNAPSHOT
4196         "\t\t    snapshot\n"
4197 #endif
4198 #ifdef CONFIG_HIST_TRIGGERS
4199         "\t\t    hist (see below)\n"
4200 #endif
4201         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4202         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4203         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4204         "\t                  events/block/block_unplug/trigger\n"
4205         "\t   The first disables tracing every time block_unplug is hit.\n"
4206         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4207         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4208         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4209         "\t   Like function triggers, the counter is only decremented if it\n"
4210         "\t    enabled or disabled tracing.\n"
4211         "\t   To remove a trigger without a count:\n"
4212         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4213         "\t   To remove a trigger with a count:\n"
4214         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4215         "\t   Filters can be ignored when removing a trigger.\n"
4216 #ifdef CONFIG_HIST_TRIGGERS
4217         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4218         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4219         "\t            [:values=<field1[,field2,...]>]\n"
4220         "\t            [:sort=<field1[,field2,...]>]\n"
4221         "\t            [:size=#entries]\n"
4222         "\t            [:pause][:continue][:clear]\n"
4223         "\t            [:name=histname1]\n"
4224         "\t            [if <filter>]\n\n"
4225         "\t    When a matching event is hit, an entry is added to a hash\n"
4226         "\t    table using the key(s) and value(s) named, and the value of a\n"
4227         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4228         "\t    correspond to fields in the event's format description.  Keys\n"
4229         "\t    can be any field, or the special string 'stacktrace'.\n"
4230         "\t    Compound keys consisting of up to two fields can be specified\n"
4231         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4232         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4233         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4234         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4235         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4236         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4237         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4238         "\t    its histogram data will be shared with other triggers of the\n"
4239         "\t    same name, and trigger hits will update this common data.\n\n"
4240         "\t    Reading the 'hist' file for the event will dump the hash\n"
4241         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4242         "\t    triggers attached to an event, there will be a table for each\n"
4243         "\t    trigger in the output.  The table displayed for a named\n"
4244         "\t    trigger will be the same as any other instance having the\n"
4245         "\t    same name.  The default format used to display a given field\n"
4246         "\t    can be modified by appending any of the following modifiers\n"
4247         "\t    to the field name, as applicable:\n\n"
4248         "\t            .hex        display a number as a hex value\n"
4249         "\t            .sym        display an address as a symbol\n"
4250         "\t            .sym-offset display an address as a symbol and offset\n"
4251         "\t            .execname   display a common_pid as a program name\n"
4252         "\t            .syscall    display a syscall id as a syscall name\n\n"
4253         "\t            .log2       display log2 value rather than raw number\n\n"
4254         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4255         "\t    trigger or to start a hist trigger but not log any events\n"
4256         "\t    until told to do so.  'continue' can be used to start or\n"
4257         "\t    restart a paused hist trigger.\n\n"
4258         "\t    The 'clear' parameter will clear the contents of a running\n"
4259         "\t    hist trigger and leave its current paused/active state\n"
4260         "\t    unchanged.\n\n"
4261         "\t    The enable_hist and disable_hist triggers can be used to\n"
4262         "\t    have one event conditionally start and stop another event's\n"
4263         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4264         "\t    the enable_event and disable_event triggers.\n"
4265 #endif
4266 ;
4267
4268 static ssize_t
4269 tracing_readme_read(struct file *filp, char __user *ubuf,
4270                        size_t cnt, loff_t *ppos)
4271 {
4272         return simple_read_from_buffer(ubuf, cnt, ppos,
4273                                         readme_msg, strlen(readme_msg));
4274 }
4275
4276 static const struct file_operations tracing_readme_fops = {
4277         .open           = tracing_open_generic,
4278         .read           = tracing_readme_read,
4279         .llseek         = generic_file_llseek,
4280 };
4281
4282 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4283 {
4284         unsigned int *ptr = v;
4285
4286         if (*pos || m->count)
4287                 ptr++;
4288
4289         (*pos)++;
4290
4291         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4292              ptr++) {
4293                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4294                         continue;
4295
4296                 return ptr;
4297         }
4298
4299         return NULL;
4300 }
4301
4302 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4303 {
4304         void *v;
4305         loff_t l = 0;
4306
4307         preempt_disable();
4308         arch_spin_lock(&trace_cmdline_lock);
4309
4310         v = &savedcmd->map_cmdline_to_pid[0];
4311         while (l <= *pos) {
4312                 v = saved_cmdlines_next(m, v, &l);
4313                 if (!v)
4314                         return NULL;
4315         }
4316
4317         return v;
4318 }
4319
4320 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4321 {
4322         arch_spin_unlock(&trace_cmdline_lock);
4323         preempt_enable();
4324 }
4325
4326 static int saved_cmdlines_show(struct seq_file *m, void *v)
4327 {
4328         char buf[TASK_COMM_LEN];
4329         unsigned int *pid = v;
4330
4331         __trace_find_cmdline(*pid, buf);
4332         seq_printf(m, "%d %s\n", *pid, buf);
4333         return 0;
4334 }
4335
4336 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4337         .start          = saved_cmdlines_start,
4338         .next           = saved_cmdlines_next,
4339         .stop           = saved_cmdlines_stop,
4340         .show           = saved_cmdlines_show,
4341 };
4342
4343 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4344 {
4345         if (tracing_disabled)
4346                 return -ENODEV;
4347
4348         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4349 }
4350
4351 static const struct file_operations tracing_saved_cmdlines_fops = {
4352         .open           = tracing_saved_cmdlines_open,
4353         .read           = seq_read,
4354         .llseek         = seq_lseek,
4355         .release        = seq_release,
4356 };
4357
4358 static ssize_t
4359 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4360                                  size_t cnt, loff_t *ppos)
4361 {
4362         char buf[64];
4363         int r;
4364
4365         arch_spin_lock(&trace_cmdline_lock);
4366         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4367         arch_spin_unlock(&trace_cmdline_lock);
4368
4369         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4370 }
4371
4372 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4373 {
4374         kfree(s->saved_cmdlines);
4375         kfree(s->map_cmdline_to_pid);
4376         kfree(s);
4377 }
4378
4379 static int tracing_resize_saved_cmdlines(unsigned int val)
4380 {
4381         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4382
4383         s = kmalloc(sizeof(*s), GFP_KERNEL);
4384         if (!s)
4385                 return -ENOMEM;
4386
4387         if (allocate_cmdlines_buffer(val, s) < 0) {
4388                 kfree(s);
4389                 return -ENOMEM;
4390         }
4391
4392         arch_spin_lock(&trace_cmdline_lock);
4393         savedcmd_temp = savedcmd;
4394         savedcmd = s;
4395         arch_spin_unlock(&trace_cmdline_lock);
4396         free_saved_cmdlines_buffer(savedcmd_temp);
4397
4398         return 0;
4399 }
4400
4401 static ssize_t
4402 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4403                                   size_t cnt, loff_t *ppos)
4404 {
4405         unsigned long val;
4406         int ret;
4407
4408         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4409         if (ret)
4410                 return ret;
4411
4412         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4413         if (!val || val > PID_MAX_DEFAULT)
4414                 return -EINVAL;
4415
4416         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4417         if (ret < 0)
4418                 return ret;
4419
4420         *ppos += cnt;
4421
4422         return cnt;
4423 }
4424
4425 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4426         .open           = tracing_open_generic,
4427         .read           = tracing_saved_cmdlines_size_read,
4428         .write          = tracing_saved_cmdlines_size_write,
4429 };
4430
4431 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4432 static union trace_enum_map_item *
4433 update_enum_map(union trace_enum_map_item *ptr)
4434 {
4435         if (!ptr->map.enum_string) {
4436                 if (ptr->tail.next) {
4437                         ptr = ptr->tail.next;
4438                         /* Set ptr to the next real item (skip head) */
4439                         ptr++;
4440                 } else
4441                         return NULL;
4442         }
4443         return ptr;
4444 }
4445
4446 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4447 {
4448         union trace_enum_map_item *ptr = v;
4449
4450         /*
4451          * Paranoid! If ptr points to end, we don't want to increment past it.
4452          * This really should never happen.
4453          */
4454         ptr = update_enum_map(ptr);
4455         if (WARN_ON_ONCE(!ptr))
4456                 return NULL;
4457
4458         ptr++;
4459
4460         (*pos)++;
4461
4462         ptr = update_enum_map(ptr);
4463
4464         return ptr;
4465 }
4466
4467 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4468 {
4469         union trace_enum_map_item *v;
4470         loff_t l = 0;
4471
4472         mutex_lock(&trace_enum_mutex);
4473
4474         v = trace_enum_maps;
4475         if (v)
4476                 v++;
4477
4478         while (v && l < *pos) {
4479                 v = enum_map_next(m, v, &l);
4480         }
4481
4482         return v;
4483 }
4484
4485 static void enum_map_stop(struct seq_file *m, void *v)
4486 {
4487         mutex_unlock(&trace_enum_mutex);
4488 }
4489
4490 static int enum_map_show(struct seq_file *m, void *v)
4491 {
4492         union trace_enum_map_item *ptr = v;
4493
4494         seq_printf(m, "%s %ld (%s)\n",
4495                    ptr->map.enum_string, ptr->map.enum_value,
4496                    ptr->map.system);
4497
4498         return 0;
4499 }
4500
4501 static const struct seq_operations tracing_enum_map_seq_ops = {
4502         .start          = enum_map_start,
4503         .next           = enum_map_next,
4504         .stop           = enum_map_stop,
4505         .show           = enum_map_show,
4506 };
4507
4508 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4509 {
4510         if (tracing_disabled)
4511                 return -ENODEV;
4512
4513         return seq_open(filp, &tracing_enum_map_seq_ops);
4514 }
4515
4516 static const struct file_operations tracing_enum_map_fops = {
4517         .open           = tracing_enum_map_open,
4518         .read           = seq_read,
4519         .llseek         = seq_lseek,
4520         .release        = seq_release,
4521 };
4522
4523 static inline union trace_enum_map_item *
4524 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4525 {
4526         /* Return tail of array given the head */
4527         return ptr + ptr->head.length + 1;
4528 }
4529
4530 static void
4531 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4532                            int len)
4533 {
4534         struct trace_enum_map **stop;
4535         struct trace_enum_map **map;
4536         union trace_enum_map_item *map_array;
4537         union trace_enum_map_item *ptr;
4538
4539         stop = start + len;
4540
4541         /*
4542          * The trace_enum_maps contains the map plus a head and tail item,
4543          * where the head holds the module and length of array, and the
4544          * tail holds a pointer to the next list.
4545          */
4546         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4547         if (!map_array) {
4548                 pr_warn("Unable to allocate trace enum mapping\n");
4549                 return;
4550         }
4551
4552         mutex_lock(&trace_enum_mutex);
4553
4554         if (!trace_enum_maps)
4555                 trace_enum_maps = map_array;
4556         else {
4557                 ptr = trace_enum_maps;
4558                 for (;;) {
4559                         ptr = trace_enum_jmp_to_tail(ptr);
4560                         if (!ptr->tail.next)
4561                                 break;
4562                         ptr = ptr->tail.next;
4563
4564                 }
4565                 ptr->tail.next = map_array;
4566         }
4567         map_array->head.mod = mod;
4568         map_array->head.length = len;
4569         map_array++;
4570
4571         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4572                 map_array->map = **map;
4573                 map_array++;
4574         }
4575         memset(map_array, 0, sizeof(*map_array));
4576
4577         mutex_unlock(&trace_enum_mutex);
4578 }
4579
4580 static void trace_create_enum_file(struct dentry *d_tracer)
4581 {
4582         trace_create_file("enum_map", 0444, d_tracer,
4583                           NULL, &tracing_enum_map_fops);
4584 }
4585
4586 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4587 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4588 static inline void trace_insert_enum_map_file(struct module *mod,
4589                               struct trace_enum_map **start, int len) { }
4590 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4591
4592 static void trace_insert_enum_map(struct module *mod,
4593                                   struct trace_enum_map **start, int len)
4594 {
4595         struct trace_enum_map **map;
4596
4597         if (len <= 0)
4598                 return;
4599
4600         map = start;
4601
4602         trace_event_enum_update(map, len);
4603
4604         trace_insert_enum_map_file(mod, start, len);
4605 }
4606
4607 static ssize_t
4608 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4609                        size_t cnt, loff_t *ppos)
4610 {
4611         struct trace_array *tr = filp->private_data;
4612         char buf[MAX_TRACER_SIZE+2];
4613         int r;
4614
4615         mutex_lock(&trace_types_lock);
4616         r = sprintf(buf, "%s\n", tr->current_trace->name);
4617         mutex_unlock(&trace_types_lock);
4618
4619         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4620 }
4621
4622 int tracer_init(struct tracer *t, struct trace_array *tr)
4623 {
4624         tracing_reset_online_cpus(&tr->trace_buffer);
4625         return t->init(tr);
4626 }
4627
4628 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4629 {
4630         int cpu;
4631
4632         for_each_tracing_cpu(cpu)
4633                 per_cpu_ptr(buf->data, cpu)->entries = val;
4634 }
4635
4636 #ifdef CONFIG_TRACER_MAX_TRACE
4637 /* resize @tr's buffer to the size of @size_tr's entries */
4638 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4639                                         struct trace_buffer *size_buf, int cpu_id)
4640 {
4641         int cpu, ret = 0;
4642
4643         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4644                 for_each_tracing_cpu(cpu) {
4645                         ret = ring_buffer_resize(trace_buf->buffer,
4646                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4647                         if (ret < 0)
4648                                 break;
4649                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4650                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4651                 }
4652         } else {
4653                 ret = ring_buffer_resize(trace_buf->buffer,
4654                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4655                 if (ret == 0)
4656                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4657                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4658         }
4659
4660         return ret;
4661 }
4662 #endif /* CONFIG_TRACER_MAX_TRACE */
4663
4664 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4665                                         unsigned long size, int cpu)
4666 {
4667         int ret;
4668
4669         /*
4670          * If kernel or user changes the size of the ring buffer
4671          * we use the size that was given, and we can forget about
4672          * expanding it later.
4673          */
4674         ring_buffer_expanded = true;
4675
4676         /* May be called before buffers are initialized */
4677         if (!tr->trace_buffer.buffer)
4678                 return 0;
4679
4680         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4681         if (ret < 0)
4682                 return ret;
4683
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4686             !tr->current_trace->use_max_tr)
4687                 goto out;
4688
4689         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4690         if (ret < 0) {
4691                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4692                                                      &tr->trace_buffer, cpu);
4693                 if (r < 0) {
4694                         /*
4695                          * AARGH! We are left with different
4696                          * size max buffer!!!!
4697                          * The max buffer is our "snapshot" buffer.
4698                          * When a tracer needs a snapshot (one of the
4699                          * latency tracers), it swaps the max buffer
4700                          * with the saved snap shot. We succeeded to
4701                          * update the size of the main buffer, but failed to
4702                          * update the size of the max buffer. But when we tried
4703                          * to reset the main buffer to the original size, we
4704                          * failed there too. This is very unlikely to
4705                          * happen, but if it does, warn and kill all
4706                          * tracing.
4707                          */
4708                         WARN_ON(1);
4709                         tracing_disabled = 1;
4710                 }
4711                 return ret;
4712         }
4713
4714         if (cpu == RING_BUFFER_ALL_CPUS)
4715                 set_buffer_entries(&tr->max_buffer, size);
4716         else
4717                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4718
4719  out:
4720 #endif /* CONFIG_TRACER_MAX_TRACE */
4721
4722         if (cpu == RING_BUFFER_ALL_CPUS)
4723                 set_buffer_entries(&tr->trace_buffer, size);
4724         else
4725                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4726
4727         return ret;
4728 }
4729
4730 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4731                                           unsigned long size, int cpu_id)
4732 {
4733         int ret = size;
4734
4735         mutex_lock(&trace_types_lock);
4736
4737         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4738                 /* make sure, this cpu is enabled in the mask */
4739                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4740                         ret = -EINVAL;
4741                         goto out;
4742                 }
4743         }
4744
4745         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4746         if (ret < 0)
4747                 ret = -ENOMEM;
4748
4749 out:
4750         mutex_unlock(&trace_types_lock);
4751
4752         return ret;
4753 }
4754
4755
4756 /**
4757  * tracing_update_buffers - used by tracing facility to expand ring buffers
4758  *
4759  * To save on memory when the tracing is never used on a system with it
4760  * configured in. The ring buffers are set to a minimum size. But once
4761  * a user starts to use the tracing facility, then they need to grow
4762  * to their default size.
4763  *
4764  * This function is to be called when a tracer is about to be used.
4765  */
4766 int tracing_update_buffers(void)
4767 {
4768         int ret = 0;
4769
4770         mutex_lock(&trace_types_lock);
4771         if (!ring_buffer_expanded)
4772                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4773                                                 RING_BUFFER_ALL_CPUS);
4774         mutex_unlock(&trace_types_lock);
4775
4776         return ret;
4777 }
4778
4779 struct trace_option_dentry;
4780
4781 static void
4782 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4783
4784 /*
4785  * Used to clear out the tracer before deletion of an instance.
4786  * Must have trace_types_lock held.
4787  */
4788 static void tracing_set_nop(struct trace_array *tr)
4789 {
4790         if (tr->current_trace == &nop_trace)
4791                 return;
4792         
4793         tr->current_trace->enabled--;
4794
4795         if (tr->current_trace->reset)
4796                 tr->current_trace->reset(tr);
4797
4798         tr->current_trace = &nop_trace;
4799 }
4800
4801 static bool tracer_options_updated;
4802
4803 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4804 {
4805         /* Only enable if the directory has been created already. */
4806         if (!tr->dir)
4807                 return;
4808
4809         /* Only create trace option files after update_tracer_options finish */
4810         if (!tracer_options_updated)
4811                 return;
4812
4813         create_trace_option_files(tr, t);
4814 }
4815
4816 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4817 {
4818         struct tracer *t;
4819 #ifdef CONFIG_TRACER_MAX_TRACE
4820         bool had_max_tr;
4821 #endif
4822         int ret = 0;
4823
4824         mutex_lock(&trace_types_lock);
4825
4826         if (!ring_buffer_expanded) {
4827                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4828                                                 RING_BUFFER_ALL_CPUS);
4829                 if (ret < 0)
4830                         goto out;
4831                 ret = 0;
4832         }
4833
4834         for (t = trace_types; t; t = t->next) {
4835                 if (strcmp(t->name, buf) == 0)
4836                         break;
4837         }
4838         if (!t) {
4839                 ret = -EINVAL;
4840                 goto out;
4841         }
4842         if (t == tr->current_trace)
4843                 goto out;
4844
4845         /* Some tracers are only allowed for the top level buffer */
4846         if (!trace_ok_for_array(t, tr)) {
4847                 ret = -EINVAL;
4848                 goto out;
4849         }
4850
4851         /* If trace pipe files are being read, we can't change the tracer */
4852         if (tr->current_trace->ref) {
4853                 ret = -EBUSY;
4854                 goto out;
4855         }
4856
4857         trace_branch_disable();
4858
4859         tr->current_trace->enabled--;
4860
4861         if (tr->current_trace->reset)
4862                 tr->current_trace->reset(tr);
4863
4864         /* Current trace needs to be nop_trace before synchronize_sched */
4865         tr->current_trace = &nop_trace;
4866
4867 #ifdef CONFIG_TRACER_MAX_TRACE
4868         had_max_tr = tr->allocated_snapshot;
4869
4870         if (had_max_tr && !t->use_max_tr) {
4871                 /*
4872                  * We need to make sure that the update_max_tr sees that
4873                  * current_trace changed to nop_trace to keep it from
4874                  * swapping the buffers after we resize it.
4875                  * The update_max_tr is called from interrupts disabled
4876                  * so a synchronized_sched() is sufficient.
4877                  */
4878                 synchronize_sched();
4879                 free_snapshot(tr);
4880         }
4881 #endif
4882
4883 #ifdef CONFIG_TRACER_MAX_TRACE
4884         if (t->use_max_tr && !had_max_tr) {
4885                 ret = alloc_snapshot(tr);
4886                 if (ret < 0)
4887                         goto out;
4888         }
4889 #endif
4890
4891         if (t->init) {
4892                 ret = tracer_init(t, tr);
4893                 if (ret)
4894                         goto out;
4895         }
4896
4897         tr->current_trace = t;
4898         tr->current_trace->enabled++;
4899         trace_branch_enable(tr);
4900  out:
4901         mutex_unlock(&trace_types_lock);
4902
4903         return ret;
4904 }
4905
4906 static ssize_t
4907 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4908                         size_t cnt, loff_t *ppos)
4909 {
4910         struct trace_array *tr = filp->private_data;
4911         char buf[MAX_TRACER_SIZE+1];
4912         int i;
4913         size_t ret;
4914         int err;
4915
4916         ret = cnt;
4917
4918         if (cnt > MAX_TRACER_SIZE)
4919                 cnt = MAX_TRACER_SIZE;
4920
4921         if (copy_from_user(buf, ubuf, cnt))
4922                 return -EFAULT;
4923
4924         buf[cnt] = 0;
4925
4926         /* strip ending whitespace. */
4927         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4928                 buf[i] = 0;
4929
4930         err = tracing_set_tracer(tr, buf);
4931         if (err)
4932                 return err;
4933
4934         *ppos += ret;
4935
4936         return ret;
4937 }
4938
4939 static ssize_t
4940 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4941                    size_t cnt, loff_t *ppos)
4942 {
4943         char buf[64];
4944         int r;
4945
4946         r = snprintf(buf, sizeof(buf), "%ld\n",
4947                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4948         if (r > sizeof(buf))
4949                 r = sizeof(buf);
4950         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4951 }
4952
4953 static ssize_t
4954 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4955                     size_t cnt, loff_t *ppos)
4956 {
4957         unsigned long val;
4958         int ret;
4959
4960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4961         if (ret)
4962                 return ret;
4963
4964         *ptr = val * 1000;
4965
4966         return cnt;
4967 }
4968
4969 static ssize_t
4970 tracing_thresh_read(struct file *filp, char __user *ubuf,
4971                     size_t cnt, loff_t *ppos)
4972 {
4973         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4974 }
4975
4976 static ssize_t
4977 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4978                      size_t cnt, loff_t *ppos)
4979 {
4980         struct trace_array *tr = filp->private_data;
4981         int ret;
4982
4983         mutex_lock(&trace_types_lock);
4984         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4985         if (ret < 0)
4986                 goto out;
4987
4988         if (tr->current_trace->update_thresh) {
4989                 ret = tr->current_trace->update_thresh(tr);
4990                 if (ret < 0)
4991                         goto out;
4992         }
4993
4994         ret = cnt;
4995 out:
4996         mutex_unlock(&trace_types_lock);
4997
4998         return ret;
4999 }
5000
5001 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5002
5003 static ssize_t
5004 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5005                      size_t cnt, loff_t *ppos)
5006 {
5007         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5008 }
5009
5010 static ssize_t
5011 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5012                       size_t cnt, loff_t *ppos)
5013 {
5014         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5015 }
5016
5017 #endif
5018
5019 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         struct trace_iterator *iter;
5023         int ret = 0;
5024
5025         if (tracing_disabled)
5026                 return -ENODEV;
5027
5028         if (trace_array_get(tr) < 0)
5029                 return -ENODEV;
5030
5031         mutex_lock(&trace_types_lock);
5032
5033         /* create a buffer to store the information to pass to userspace */
5034         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5035         if (!iter) {
5036                 ret = -ENOMEM;
5037                 __trace_array_put(tr);
5038                 goto out;
5039         }
5040
5041         trace_seq_init(&iter->seq);
5042         iter->trace = tr->current_trace;
5043
5044         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5045                 ret = -ENOMEM;
5046                 goto fail;
5047         }
5048
5049         /* trace pipe does not show start of buffer */
5050         cpumask_setall(iter->started);
5051
5052         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5053                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5054
5055         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5056         if (trace_clocks[tr->clock_id].in_ns)
5057                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5058
5059         iter->tr = tr;
5060         iter->trace_buffer = &tr->trace_buffer;
5061         iter->cpu_file = tracing_get_cpu(inode);
5062         mutex_init(&iter->mutex);
5063         filp->private_data = iter;
5064
5065         if (iter->trace->pipe_open)
5066                 iter->trace->pipe_open(iter);
5067
5068         nonseekable_open(inode, filp);
5069
5070         tr->current_trace->ref++;
5071 out:
5072         mutex_unlock(&trace_types_lock);
5073         return ret;
5074
5075 fail:
5076         kfree(iter);
5077         __trace_array_put(tr);
5078         mutex_unlock(&trace_types_lock);
5079         return ret;
5080 }
5081
5082 static int tracing_release_pipe(struct inode *inode, struct file *file)
5083 {
5084         struct trace_iterator *iter = file->private_data;
5085         struct trace_array *tr = inode->i_private;
5086
5087         mutex_lock(&trace_types_lock);
5088
5089         tr->current_trace->ref--;
5090
5091         if (iter->trace->pipe_close)
5092                 iter->trace->pipe_close(iter);
5093
5094         mutex_unlock(&trace_types_lock);
5095
5096         free_cpumask_var(iter->started);
5097         mutex_destroy(&iter->mutex);
5098         kfree(iter);
5099
5100         trace_array_put(tr);
5101
5102         return 0;
5103 }
5104
5105 static unsigned int
5106 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5107 {
5108         struct trace_array *tr = iter->tr;
5109
5110         /* Iterators are static, they should be filled or empty */
5111         if (trace_buffer_iter(iter, iter->cpu_file))
5112                 return POLLIN | POLLRDNORM;
5113
5114         if (tr->trace_flags & TRACE_ITER_BLOCK)
5115                 /*
5116                  * Always select as readable when in blocking mode
5117                  */
5118                 return POLLIN | POLLRDNORM;
5119         else
5120                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5121                                              filp, poll_table);
5122 }
5123
5124 static unsigned int
5125 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5126 {
5127         struct trace_iterator *iter = filp->private_data;
5128
5129         return trace_poll(iter, filp, poll_table);
5130 }
5131
5132 /* Must be called with iter->mutex held. */
5133 static int tracing_wait_pipe(struct file *filp)
5134 {
5135         struct trace_iterator *iter = filp->private_data;
5136         int ret;
5137
5138         while (trace_empty(iter)) {
5139
5140                 if ((filp->f_flags & O_NONBLOCK)) {
5141                         return -EAGAIN;
5142                 }
5143
5144                 /*
5145                  * We block until we read something and tracing is disabled.
5146                  * We still block if tracing is disabled, but we have never
5147                  * read anything. This allows a user to cat this file, and
5148                  * then enable tracing. But after we have read something,
5149                  * we give an EOF when tracing is again disabled.
5150                  *
5151                  * iter->pos will be 0 if we haven't read anything.
5152                  */
5153                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5154                         break;
5155
5156                 mutex_unlock(&iter->mutex);
5157
5158                 ret = wait_on_pipe(iter, false);
5159
5160                 mutex_lock(&iter->mutex);
5161
5162                 if (ret)
5163                         return ret;
5164         }
5165
5166         return 1;
5167 }
5168
5169 /*
5170  * Consumer reader.
5171  */
5172 static ssize_t
5173 tracing_read_pipe(struct file *filp, char __user *ubuf,
5174                   size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_iterator *iter = filp->private_data;
5177         ssize_t sret;
5178
5179         /*
5180          * Avoid more than one consumer on a single file descriptor
5181          * This is just a matter of traces coherency, the ring buffer itself
5182          * is protected.
5183          */
5184         mutex_lock(&iter->mutex);
5185
5186         /* return any leftover data */
5187         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5188         if (sret != -EBUSY)
5189                 goto out;
5190
5191         trace_seq_init(&iter->seq);
5192
5193         if (iter->trace->read) {
5194                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5195                 if (sret)
5196                         goto out;
5197         }
5198
5199 waitagain:
5200         sret = tracing_wait_pipe(filp);
5201         if (sret <= 0)
5202                 goto out;
5203
5204         /* stop when tracing is finished */
5205         if (trace_empty(iter)) {
5206                 sret = 0;
5207                 goto out;
5208         }
5209
5210         if (cnt >= PAGE_SIZE)
5211                 cnt = PAGE_SIZE - 1;
5212
5213         /* reset all but tr, trace, and overruns */
5214         memset(&iter->seq, 0,
5215                sizeof(struct trace_iterator) -
5216                offsetof(struct trace_iterator, seq));
5217         cpumask_clear(iter->started);
5218         trace_seq_init(&iter->seq);
5219         iter->pos = -1;
5220
5221         trace_event_read_lock();
5222         trace_access_lock(iter->cpu_file);
5223         while (trace_find_next_entry_inc(iter) != NULL) {
5224                 enum print_line_t ret;
5225                 int save_len = iter->seq.seq.len;
5226
5227                 ret = print_trace_line(iter);
5228                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5229                         /* don't print partial lines */
5230                         iter->seq.seq.len = save_len;
5231                         break;
5232                 }
5233                 if (ret != TRACE_TYPE_NO_CONSUME)
5234                         trace_consume(iter);
5235
5236                 if (trace_seq_used(&iter->seq) >= cnt)
5237                         break;
5238
5239                 /*
5240                  * Setting the full flag means we reached the trace_seq buffer
5241                  * size and we should leave by partial output condition above.
5242                  * One of the trace_seq_* functions is not used properly.
5243                  */
5244                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5245                           iter->ent->type);
5246         }
5247         trace_access_unlock(iter->cpu_file);
5248         trace_event_read_unlock();
5249
5250         /* Now copy what we have to the user */
5251         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5252         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5253                 trace_seq_init(&iter->seq);
5254
5255         /*
5256          * If there was nothing to send to user, in spite of consuming trace
5257          * entries, go back to wait for more entries.
5258          */
5259         if (sret == -EBUSY)
5260                 goto waitagain;
5261
5262 out:
5263         mutex_unlock(&iter->mutex);
5264
5265         return sret;
5266 }
5267
5268 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5269                                      unsigned int idx)
5270 {
5271         __free_page(spd->pages[idx]);
5272 }
5273
5274 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5275         .can_merge              = 0,
5276         .confirm                = generic_pipe_buf_confirm,
5277         .release                = generic_pipe_buf_release,
5278         .steal                  = generic_pipe_buf_steal,
5279         .get                    = generic_pipe_buf_get,
5280 };
5281
5282 static size_t
5283 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5284 {
5285         size_t count;
5286         int save_len;
5287         int ret;
5288
5289         /* Seq buffer is page-sized, exactly what we need. */
5290         for (;;) {
5291                 save_len = iter->seq.seq.len;
5292                 ret = print_trace_line(iter);
5293
5294                 if (trace_seq_has_overflowed(&iter->seq)) {
5295                         iter->seq.seq.len = save_len;
5296                         break;
5297                 }
5298
5299                 /*
5300                  * This should not be hit, because it should only
5301                  * be set if the iter->seq overflowed. But check it
5302                  * anyway to be safe.
5303                  */
5304                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5305                         iter->seq.seq.len = save_len;
5306                         break;
5307                 }
5308
5309                 count = trace_seq_used(&iter->seq) - save_len;
5310                 if (rem < count) {
5311                         rem = 0;
5312                         iter->seq.seq.len = save_len;
5313                         break;
5314                 }
5315
5316                 if (ret != TRACE_TYPE_NO_CONSUME)
5317                         trace_consume(iter);
5318                 rem -= count;
5319                 if (!trace_find_next_entry_inc(iter))   {
5320                         rem = 0;
5321                         iter->ent = NULL;
5322                         break;
5323                 }
5324         }
5325
5326         return rem;
5327 }
5328
5329 static ssize_t tracing_splice_read_pipe(struct file *filp,
5330                                         loff_t *ppos,
5331                                         struct pipe_inode_info *pipe,
5332                                         size_t len,
5333                                         unsigned int flags)
5334 {
5335         struct page *pages_def[PIPE_DEF_BUFFERS];
5336         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5337         struct trace_iterator *iter = filp->private_data;
5338         struct splice_pipe_desc spd = {
5339                 .pages          = pages_def,
5340                 .partial        = partial_def,
5341                 .nr_pages       = 0, /* This gets updated below. */
5342                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5343                 .flags          = flags,
5344                 .ops            = &tracing_pipe_buf_ops,
5345                 .spd_release    = tracing_spd_release_pipe,
5346         };
5347         ssize_t ret;
5348         size_t rem;
5349         unsigned int i;
5350
5351         if (splice_grow_spd(pipe, &spd))
5352                 return -ENOMEM;
5353
5354         mutex_lock(&iter->mutex);
5355
5356         if (iter->trace->splice_read) {
5357                 ret = iter->trace->splice_read(iter, filp,
5358                                                ppos, pipe, len, flags);
5359                 if (ret)
5360                         goto out_err;
5361         }
5362
5363         ret = tracing_wait_pipe(filp);
5364         if (ret <= 0)
5365                 goto out_err;
5366
5367         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5368                 ret = -EFAULT;
5369                 goto out_err;
5370         }
5371
5372         trace_event_read_lock();
5373         trace_access_lock(iter->cpu_file);
5374
5375         /* Fill as many pages as possible. */
5376         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5377                 spd.pages[i] = alloc_page(GFP_KERNEL);
5378                 if (!spd.pages[i])
5379                         break;
5380
5381                 rem = tracing_fill_pipe_page(rem, iter);
5382
5383                 /* Copy the data into the page, so we can start over. */
5384                 ret = trace_seq_to_buffer(&iter->seq,
5385                                           page_address(spd.pages[i]),
5386                                           trace_seq_used(&iter->seq));
5387                 if (ret < 0) {
5388                         __free_page(spd.pages[i]);
5389                         break;
5390                 }
5391                 spd.partial[i].offset = 0;
5392                 spd.partial[i].len = trace_seq_used(&iter->seq);
5393
5394                 trace_seq_init(&iter->seq);
5395         }
5396
5397         trace_access_unlock(iter->cpu_file);
5398         trace_event_read_unlock();
5399         mutex_unlock(&iter->mutex);
5400
5401         spd.nr_pages = i;
5402
5403         if (i)
5404                 ret = splice_to_pipe(pipe, &spd);
5405         else
5406                 ret = 0;
5407 out:
5408         splice_shrink_spd(&spd);
5409         return ret;
5410
5411 out_err:
5412         mutex_unlock(&iter->mutex);
5413         goto out;
5414 }
5415
5416 static ssize_t
5417 tracing_entries_read(struct file *filp, char __user *ubuf,
5418                      size_t cnt, loff_t *ppos)
5419 {
5420         struct inode *inode = file_inode(filp);
5421         struct trace_array *tr = inode->i_private;
5422         int cpu = tracing_get_cpu(inode);
5423         char buf[64];
5424         int r = 0;
5425         ssize_t ret;
5426
5427         mutex_lock(&trace_types_lock);
5428
5429         if (cpu == RING_BUFFER_ALL_CPUS) {
5430                 int cpu, buf_size_same;
5431                 unsigned long size;
5432
5433                 size = 0;
5434                 buf_size_same = 1;
5435                 /* check if all cpu sizes are same */
5436                 for_each_tracing_cpu(cpu) {
5437                         /* fill in the size from first enabled cpu */
5438                         if (size == 0)
5439                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5440                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5441                                 buf_size_same = 0;
5442                                 break;
5443                         }
5444                 }
5445
5446                 if (buf_size_same) {
5447                         if (!ring_buffer_expanded)
5448                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5449                                             size >> 10,
5450                                             trace_buf_size >> 10);
5451                         else
5452                                 r = sprintf(buf, "%lu\n", size >> 10);
5453                 } else
5454                         r = sprintf(buf, "X\n");
5455         } else
5456                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5457
5458         mutex_unlock(&trace_types_lock);
5459
5460         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5461         return ret;
5462 }
5463
5464 static ssize_t
5465 tracing_entries_write(struct file *filp, const char __user *ubuf,
5466                       size_t cnt, loff_t *ppos)
5467 {
5468         struct inode *inode = file_inode(filp);
5469         struct trace_array *tr = inode->i_private;
5470         unsigned long val;
5471         int ret;
5472
5473         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5474         if (ret)
5475                 return ret;
5476
5477         /* must have at least 1 entry */
5478         if (!val)
5479                 return -EINVAL;
5480
5481         /* value is in KB */
5482         val <<= 10;
5483         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5484         if (ret < 0)
5485                 return ret;
5486
5487         *ppos += cnt;
5488
5489         return cnt;
5490 }
5491
5492 static ssize_t
5493 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5494                                 size_t cnt, loff_t *ppos)
5495 {
5496         struct trace_array *tr = filp->private_data;
5497         char buf[64];
5498         int r, cpu;
5499         unsigned long size = 0, expanded_size = 0;
5500
5501         mutex_lock(&trace_types_lock);
5502         for_each_tracing_cpu(cpu) {
5503                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5504                 if (!ring_buffer_expanded)
5505                         expanded_size += trace_buf_size >> 10;
5506         }
5507         if (ring_buffer_expanded)
5508                 r = sprintf(buf, "%lu\n", size);
5509         else
5510                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5511         mutex_unlock(&trace_types_lock);
5512
5513         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5514 }
5515
5516 static ssize_t
5517 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5518                           size_t cnt, loff_t *ppos)
5519 {
5520         /*
5521          * There is no need to read what the user has written, this function
5522          * is just to make sure that there is no error when "echo" is used
5523          */
5524
5525         *ppos += cnt;
5526
5527         return cnt;
5528 }
5529
5530 static int
5531 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5532 {
5533         struct trace_array *tr = inode->i_private;
5534
5535         /* disable tracing ? */
5536         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5537                 tracer_tracing_off(tr);
5538         /* resize the ring buffer to 0 */
5539         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5540
5541         trace_array_put(tr);
5542
5543         return 0;
5544 }
5545
5546 static ssize_t
5547 tracing_mark_write(struct file *filp, const char __user *ubuf,
5548                                         size_t cnt, loff_t *fpos)
5549 {
5550         unsigned long addr = (unsigned long)ubuf;
5551         struct trace_array *tr = filp->private_data;
5552         struct ring_buffer_event *event;
5553         struct ring_buffer *buffer;
5554         struct print_entry *entry;
5555         unsigned long irq_flags;
5556         struct page *pages[2];
5557         void *map_page[2];
5558         int nr_pages = 1;
5559         ssize_t written;
5560         int offset;
5561         int size;
5562         int len;
5563         int ret;
5564         int i;
5565
5566         if (tracing_disabled)
5567                 return -EINVAL;
5568
5569         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5570                 return -EINVAL;
5571
5572         if (cnt > TRACE_BUF_SIZE)
5573                 cnt = TRACE_BUF_SIZE;
5574
5575         /*
5576          * Userspace is injecting traces into the kernel trace buffer.
5577          * We want to be as non intrusive as possible.
5578          * To do so, we do not want to allocate any special buffers
5579          * or take any locks, but instead write the userspace data
5580          * straight into the ring buffer.
5581          *
5582          * First we need to pin the userspace buffer into memory,
5583          * which, most likely it is, because it just referenced it.
5584          * But there's no guarantee that it is. By using get_user_pages_fast()
5585          * and kmap_atomic/kunmap_atomic() we can get access to the
5586          * pages directly. We then write the data directly into the
5587          * ring buffer.
5588          */
5589         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5590
5591         /* check if we cross pages */
5592         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5593                 nr_pages = 2;
5594
5595         offset = addr & (PAGE_SIZE - 1);
5596         addr &= PAGE_MASK;
5597
5598         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5599         if (ret < nr_pages) {
5600                 while (--ret >= 0)
5601                         put_page(pages[ret]);
5602                 written = -EFAULT;
5603                 goto out;
5604         }
5605
5606         for (i = 0; i < nr_pages; i++)
5607                 map_page[i] = kmap_atomic(pages[i]);
5608
5609         local_save_flags(irq_flags);
5610         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5611         buffer = tr->trace_buffer.buffer;
5612         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5613                                           irq_flags, preempt_count());
5614         if (!event) {
5615                 /* Ring buffer disabled, return as if not open for write */
5616                 written = -EBADF;
5617                 goto out_unlock;
5618         }
5619
5620         entry = ring_buffer_event_data(event);
5621         entry->ip = _THIS_IP_;
5622
5623         if (nr_pages == 2) {
5624                 len = PAGE_SIZE - offset;
5625                 memcpy(&entry->buf, map_page[0] + offset, len);
5626                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5627         } else
5628                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5629
5630         if (entry->buf[cnt - 1] != '\n') {
5631                 entry->buf[cnt] = '\n';
5632                 entry->buf[cnt + 1] = '\0';
5633         } else
5634                 entry->buf[cnt] = '\0';
5635
5636         __buffer_unlock_commit(buffer, event);
5637
5638         written = cnt;
5639
5640         *fpos += written;
5641
5642  out_unlock:
5643         for (i = nr_pages - 1; i >= 0; i--) {
5644                 kunmap_atomic(map_page[i]);
5645                 put_page(pages[i]);
5646         }
5647  out:
5648         return written;
5649 }
5650
5651 static int tracing_clock_show(struct seq_file *m, void *v)
5652 {
5653         struct trace_array *tr = m->private;
5654         int i;
5655
5656         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5657                 seq_printf(m,
5658                         "%s%s%s%s", i ? " " : "",
5659                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5660                         i == tr->clock_id ? "]" : "");
5661         seq_putc(m, '\n');
5662
5663         return 0;
5664 }
5665
5666 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5667 {
5668         int i;
5669
5670         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5671                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5672                         break;
5673         }
5674         if (i == ARRAY_SIZE(trace_clocks))
5675                 return -EINVAL;
5676
5677         mutex_lock(&trace_types_lock);
5678
5679         tr->clock_id = i;
5680
5681         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5682
5683         /*
5684          * New clock may not be consistent with the previous clock.
5685          * Reset the buffer so that it doesn't have incomparable timestamps.
5686          */
5687         tracing_reset_online_cpus(&tr->trace_buffer);
5688
5689 #ifdef CONFIG_TRACER_MAX_TRACE
5690         if (tr->max_buffer.buffer)
5691                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5692         tracing_reset_online_cpus(&tr->max_buffer);
5693 #endif
5694
5695         mutex_unlock(&trace_types_lock);
5696
5697         return 0;
5698 }
5699
5700 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5701                                    size_t cnt, loff_t *fpos)
5702 {
5703         struct seq_file *m = filp->private_data;
5704         struct trace_array *tr = m->private;
5705         char buf[64];
5706         const char *clockstr;
5707         int ret;
5708
5709         if (cnt >= sizeof(buf))
5710                 return -EINVAL;
5711
5712         if (copy_from_user(buf, ubuf, cnt))
5713                 return -EFAULT;
5714
5715         buf[cnt] = 0;
5716
5717         clockstr = strstrip(buf);
5718
5719         ret = tracing_set_clock(tr, clockstr);
5720         if (ret)
5721                 return ret;
5722
5723         *fpos += cnt;
5724
5725         return cnt;
5726 }
5727
5728 static int tracing_clock_open(struct inode *inode, struct file *file)
5729 {
5730         struct trace_array *tr = inode->i_private;
5731         int ret;
5732
5733         if (tracing_disabled)
5734                 return -ENODEV;
5735
5736         if (trace_array_get(tr))
5737                 return -ENODEV;
5738
5739         ret = single_open(file, tracing_clock_show, inode->i_private);
5740         if (ret < 0)
5741                 trace_array_put(tr);
5742
5743         return ret;
5744 }
5745
5746 struct ftrace_buffer_info {
5747         struct trace_iterator   iter;
5748         void                    *spare;
5749         unsigned int            read;
5750 };
5751
5752 #ifdef CONFIG_TRACER_SNAPSHOT
5753 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5754 {
5755         struct trace_array *tr = inode->i_private;
5756         struct trace_iterator *iter;
5757         struct seq_file *m;
5758         int ret = 0;
5759
5760         if (trace_array_get(tr) < 0)
5761                 return -ENODEV;
5762
5763         if (file->f_mode & FMODE_READ) {
5764                 iter = __tracing_open(inode, file, true);
5765                 if (IS_ERR(iter))
5766                         ret = PTR_ERR(iter);
5767         } else {
5768                 /* Writes still need the seq_file to hold the private data */
5769                 ret = -ENOMEM;
5770                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5771                 if (!m)
5772                         goto out;
5773                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5774                 if (!iter) {
5775                         kfree(m);
5776                         goto out;
5777                 }
5778                 ret = 0;
5779
5780                 iter->tr = tr;
5781                 iter->trace_buffer = &tr->max_buffer;
5782                 iter->cpu_file = tracing_get_cpu(inode);
5783                 m->private = iter;
5784                 file->private_data = m;
5785         }
5786 out:
5787         if (ret < 0)
5788                 trace_array_put(tr);
5789
5790         return ret;
5791 }
5792
5793 static ssize_t
5794 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5795                        loff_t *ppos)
5796 {
5797         struct seq_file *m = filp->private_data;
5798         struct trace_iterator *iter = m->private;
5799         struct trace_array *tr = iter->tr;
5800         unsigned long val;
5801         int ret;
5802
5803         ret = tracing_update_buffers();
5804         if (ret < 0)
5805                 return ret;
5806
5807         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5808         if (ret)
5809                 return ret;
5810
5811         mutex_lock(&trace_types_lock);
5812
5813         if (tr->current_trace->use_max_tr) {
5814                 ret = -EBUSY;
5815                 goto out;
5816         }
5817
5818         switch (val) {
5819         case 0:
5820                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5821                         ret = -EINVAL;
5822                         break;
5823                 }
5824                 if (tr->allocated_snapshot)
5825                         free_snapshot(tr);
5826                 break;
5827         case 1:
5828 /* Only allow per-cpu swap if the ring buffer supports it */
5829 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5830                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5831                         ret = -EINVAL;
5832                         break;
5833                 }
5834 #endif
5835                 if (!tr->allocated_snapshot)
5836                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5837                                 &tr->trace_buffer, iter->cpu_file);
5838                 else
5839                         ret = alloc_snapshot(tr);
5840
5841                 if (ret < 0)
5842                         break;
5843
5844                 local_irq_disable();
5845                 /* Now, we're going to swap */
5846                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5847                         update_max_tr(tr, current, smp_processor_id());
5848                 else
5849                         update_max_tr_single(tr, current, iter->cpu_file);
5850                 local_irq_enable();
5851                 break;
5852         default:
5853                 if (tr->allocated_snapshot) {
5854                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5855                                 tracing_reset_online_cpus(&tr->max_buffer);
5856                         else
5857                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5858                 }
5859                 break;
5860         }
5861
5862         if (ret >= 0) {
5863                 *ppos += cnt;
5864                 ret = cnt;
5865         }
5866 out:
5867         mutex_unlock(&trace_types_lock);
5868         return ret;
5869 }
5870
5871 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5872 {
5873         struct seq_file *m = file->private_data;
5874         int ret;
5875
5876         ret = tracing_release(inode, file);
5877
5878         if (file->f_mode & FMODE_READ)
5879                 return ret;
5880
5881         /* If write only, the seq_file is just a stub */
5882         if (m)
5883                 kfree(m->private);
5884         kfree(m);
5885
5886         return 0;
5887 }
5888
5889 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5890 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5891                                     size_t count, loff_t *ppos);
5892 static int tracing_buffers_release(struct inode *inode, struct file *file);
5893 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5894                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5895
5896 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5897 {
5898         struct ftrace_buffer_info *info;
5899         int ret;
5900
5901         ret = tracing_buffers_open(inode, filp);
5902         if (ret < 0)
5903                 return ret;
5904
5905         info = filp->private_data;
5906
5907         if (info->iter.trace->use_max_tr) {
5908                 tracing_buffers_release(inode, filp);
5909                 return -EBUSY;
5910         }
5911
5912         info->iter.snapshot = true;
5913         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5914
5915         return ret;
5916 }
5917
5918 #endif /* CONFIG_TRACER_SNAPSHOT */
5919
5920
5921 static const struct file_operations tracing_thresh_fops = {
5922         .open           = tracing_open_generic,
5923         .read           = tracing_thresh_read,
5924         .write          = tracing_thresh_write,
5925         .llseek         = generic_file_llseek,
5926 };
5927
5928 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5929 static const struct file_operations tracing_max_lat_fops = {
5930         .open           = tracing_open_generic,
5931         .read           = tracing_max_lat_read,
5932         .write          = tracing_max_lat_write,
5933         .llseek         = generic_file_llseek,
5934 };
5935 #endif
5936
5937 static const struct file_operations set_tracer_fops = {
5938         .open           = tracing_open_generic,
5939         .read           = tracing_set_trace_read,
5940         .write          = tracing_set_trace_write,
5941         .llseek         = generic_file_llseek,
5942 };
5943
5944 static const struct file_operations tracing_pipe_fops = {
5945         .open           = tracing_open_pipe,
5946         .poll           = tracing_poll_pipe,
5947         .read           = tracing_read_pipe,
5948         .splice_read    = tracing_splice_read_pipe,
5949         .release        = tracing_release_pipe,
5950         .llseek         = no_llseek,
5951 };
5952
5953 static const struct file_operations tracing_entries_fops = {
5954         .open           = tracing_open_generic_tr,
5955         .read           = tracing_entries_read,
5956         .write          = tracing_entries_write,
5957         .llseek         = generic_file_llseek,
5958         .release        = tracing_release_generic_tr,
5959 };
5960
5961 static const struct file_operations tracing_total_entries_fops = {
5962         .open           = tracing_open_generic_tr,
5963         .read           = tracing_total_entries_read,
5964         .llseek         = generic_file_llseek,
5965         .release        = tracing_release_generic_tr,
5966 };
5967
5968 static const struct file_operations tracing_free_buffer_fops = {
5969         .open           = tracing_open_generic_tr,
5970         .write          = tracing_free_buffer_write,
5971         .release        = tracing_free_buffer_release,
5972 };
5973
5974 static const struct file_operations tracing_mark_fops = {
5975         .open           = tracing_open_generic_tr,
5976         .write          = tracing_mark_write,
5977         .llseek         = generic_file_llseek,
5978         .release        = tracing_release_generic_tr,
5979 };
5980
5981 static const struct file_operations trace_clock_fops = {
5982         .open           = tracing_clock_open,
5983         .read           = seq_read,
5984         .llseek         = seq_lseek,
5985         .release        = tracing_single_release_tr,
5986         .write          = tracing_clock_write,
5987 };
5988
5989 #ifdef CONFIG_TRACER_SNAPSHOT
5990 static const struct file_operations snapshot_fops = {
5991         .open           = tracing_snapshot_open,
5992         .read           = seq_read,
5993         .write          = tracing_snapshot_write,
5994         .llseek         = tracing_lseek,
5995         .release        = tracing_snapshot_release,
5996 };
5997
5998 static const struct file_operations snapshot_raw_fops = {
5999         .open           = snapshot_raw_open,
6000         .read           = tracing_buffers_read,
6001         .release        = tracing_buffers_release,
6002         .splice_read    = tracing_buffers_splice_read,
6003         .llseek         = no_llseek,
6004 };
6005
6006 #endif /* CONFIG_TRACER_SNAPSHOT */
6007
6008 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6009 {
6010         struct trace_array *tr = inode->i_private;
6011         struct ftrace_buffer_info *info;
6012         int ret;
6013
6014         if (tracing_disabled)
6015                 return -ENODEV;
6016
6017         if (trace_array_get(tr) < 0)
6018                 return -ENODEV;
6019
6020         info = kzalloc(sizeof(*info), GFP_KERNEL);
6021         if (!info) {
6022                 trace_array_put(tr);
6023                 return -ENOMEM;
6024         }
6025
6026         mutex_lock(&trace_types_lock);
6027
6028         info->iter.tr           = tr;
6029         info->iter.cpu_file     = tracing_get_cpu(inode);
6030         info->iter.trace        = tr->current_trace;
6031         info->iter.trace_buffer = &tr->trace_buffer;
6032         info->spare             = NULL;
6033         /* Force reading ring buffer for first read */
6034         info->read              = (unsigned int)-1;
6035
6036         filp->private_data = info;
6037
6038         tr->current_trace->ref++;
6039
6040         mutex_unlock(&trace_types_lock);
6041
6042         ret = nonseekable_open(inode, filp);
6043         if (ret < 0)
6044                 trace_array_put(tr);
6045
6046         return ret;
6047 }
6048
6049 static unsigned int
6050 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6051 {
6052         struct ftrace_buffer_info *info = filp->private_data;
6053         struct trace_iterator *iter = &info->iter;
6054
6055         return trace_poll(iter, filp, poll_table);
6056 }
6057
6058 static ssize_t
6059 tracing_buffers_read(struct file *filp, char __user *ubuf,
6060                      size_t count, loff_t *ppos)
6061 {
6062         struct ftrace_buffer_info *info = filp->private_data;
6063         struct trace_iterator *iter = &info->iter;
6064         ssize_t ret;
6065         ssize_t size;
6066
6067         if (!count)
6068                 return 0;
6069
6070 #ifdef CONFIG_TRACER_MAX_TRACE
6071         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6072                 return -EBUSY;
6073 #endif
6074
6075         if (!info->spare)
6076                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6077                                                           iter->cpu_file);
6078         if (!info->spare)
6079                 return -ENOMEM;
6080
6081         /* Do we have previous read data to read? */
6082         if (info->read < PAGE_SIZE)
6083                 goto read;
6084
6085  again:
6086         trace_access_lock(iter->cpu_file);
6087         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6088                                     &info->spare,
6089                                     count,
6090                                     iter->cpu_file, 0);
6091         trace_access_unlock(iter->cpu_file);
6092
6093         if (ret < 0) {
6094                 if (trace_empty(iter)) {
6095                         if ((filp->f_flags & O_NONBLOCK))
6096                                 return -EAGAIN;
6097
6098                         ret = wait_on_pipe(iter, false);
6099                         if (ret)
6100                                 return ret;
6101
6102                         goto again;
6103                 }
6104                 return 0;
6105         }
6106
6107         info->read = 0;
6108  read:
6109         size = PAGE_SIZE - info->read;
6110         if (size > count)
6111                 size = count;
6112
6113         ret = copy_to_user(ubuf, info->spare + info->read, size);
6114         if (ret == size)
6115                 return -EFAULT;
6116
6117         size -= ret;
6118
6119         *ppos += size;
6120         info->read += size;
6121
6122         return size;
6123 }
6124
6125 static int tracing_buffers_release(struct inode *inode, struct file *file)
6126 {
6127         struct ftrace_buffer_info *info = file->private_data;
6128         struct trace_iterator *iter = &info->iter;
6129
6130         mutex_lock(&trace_types_lock);
6131
6132         iter->tr->current_trace->ref--;
6133
6134         __trace_array_put(iter->tr);
6135
6136         if (info->spare)
6137                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6138         kfree(info);
6139
6140         mutex_unlock(&trace_types_lock);
6141
6142         return 0;
6143 }
6144
6145 struct buffer_ref {
6146         struct ring_buffer      *buffer;
6147         void                    *page;
6148         int                     ref;
6149 };
6150
6151 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6152                                     struct pipe_buffer *buf)
6153 {
6154         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6155
6156         if (--ref->ref)
6157                 return;
6158
6159         ring_buffer_free_read_page(ref->buffer, ref->page);
6160         kfree(ref);
6161         buf->private = 0;
6162 }
6163
6164 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6165                                 struct pipe_buffer *buf)
6166 {
6167         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6168
6169         if (ref->ref > INT_MAX/2)
6170                 return false;
6171
6172         ref->ref++;
6173         return true;
6174 }
6175
6176 /* Pipe buffer operations for a buffer. */
6177 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6178         .can_merge              = 0,
6179         .confirm                = generic_pipe_buf_confirm,
6180         .release                = buffer_pipe_buf_release,
6181         .steal                  = generic_pipe_buf_steal,
6182         .get                    = buffer_pipe_buf_get,
6183 };
6184
6185 /*
6186  * Callback from splice_to_pipe(), if we need to release some pages
6187  * at the end of the spd in case we error'ed out in filling the pipe.
6188  */
6189 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6190 {
6191         struct buffer_ref *ref =
6192                 (struct buffer_ref *)spd->partial[i].private;
6193
6194         if (--ref->ref)
6195                 return;
6196
6197         ring_buffer_free_read_page(ref->buffer, ref->page);
6198         kfree(ref);
6199         spd->partial[i].private = 0;
6200 }
6201
6202 static ssize_t
6203 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6204                             struct pipe_inode_info *pipe, size_t len,
6205                             unsigned int flags)
6206 {
6207         struct ftrace_buffer_info *info = file->private_data;
6208         struct trace_iterator *iter = &info->iter;
6209         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6210         struct page *pages_def[PIPE_DEF_BUFFERS];
6211         struct splice_pipe_desc spd = {
6212                 .pages          = pages_def,
6213                 .partial        = partial_def,
6214                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6215                 .flags          = flags,
6216                 .ops            = &buffer_pipe_buf_ops,
6217                 .spd_release    = buffer_spd_release,
6218         };
6219         struct buffer_ref *ref;
6220         int entries, i;
6221         ssize_t ret = 0;
6222
6223 #ifdef CONFIG_TRACER_MAX_TRACE
6224         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6225                 return -EBUSY;
6226 #endif
6227
6228         if (*ppos & (PAGE_SIZE - 1))
6229                 return -EINVAL;
6230
6231         if (len & (PAGE_SIZE - 1)) {
6232                 if (len < PAGE_SIZE)
6233                         return -EINVAL;
6234                 len &= PAGE_MASK;
6235         }
6236
6237         if (splice_grow_spd(pipe, &spd))
6238                 return -ENOMEM;
6239
6240  again:
6241         trace_access_lock(iter->cpu_file);
6242         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6243
6244         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6245                 struct page *page;
6246                 int r;
6247
6248                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6249                 if (!ref) {
6250                         ret = -ENOMEM;
6251                         break;
6252                 }
6253
6254                 ref->ref = 1;
6255                 ref->buffer = iter->trace_buffer->buffer;
6256                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6257                 if (!ref->page) {
6258                         ret = -ENOMEM;
6259                         kfree(ref);
6260                         break;
6261                 }
6262
6263                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6264                                           len, iter->cpu_file, 1);
6265                 if (r < 0) {
6266                         ring_buffer_free_read_page(ref->buffer, ref->page);
6267                         kfree(ref);
6268                         break;
6269                 }
6270
6271                 page = virt_to_page(ref->page);
6272
6273                 spd.pages[i] = page;
6274                 spd.partial[i].len = PAGE_SIZE;
6275                 spd.partial[i].offset = 0;
6276                 spd.partial[i].private = (unsigned long)ref;
6277                 spd.nr_pages++;
6278                 *ppos += PAGE_SIZE;
6279
6280                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6281         }
6282
6283         trace_access_unlock(iter->cpu_file);
6284         spd.nr_pages = i;
6285
6286         /* did we read anything? */
6287         if (!spd.nr_pages) {
6288                 if (ret)
6289                         goto out;
6290
6291                 ret = -EAGAIN;
6292                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6293                         goto out;
6294
6295                 ret = wait_on_pipe(iter, true);
6296                 if (ret)
6297                         goto out;
6298
6299                 goto again;
6300         }
6301
6302         ret = splice_to_pipe(pipe, &spd);
6303 out:
6304         splice_shrink_spd(&spd);
6305
6306         return ret;
6307 }
6308
6309 static const struct file_operations tracing_buffers_fops = {
6310         .open           = tracing_buffers_open,
6311         .read           = tracing_buffers_read,
6312         .poll           = tracing_buffers_poll,
6313         .release        = tracing_buffers_release,
6314         .splice_read    = tracing_buffers_splice_read,
6315         .llseek         = no_llseek,
6316 };
6317
6318 static ssize_t
6319 tracing_stats_read(struct file *filp, char __user *ubuf,
6320                    size_t count, loff_t *ppos)
6321 {
6322         struct inode *inode = file_inode(filp);
6323         struct trace_array *tr = inode->i_private;
6324         struct trace_buffer *trace_buf = &tr->trace_buffer;
6325         int cpu = tracing_get_cpu(inode);
6326         struct trace_seq *s;
6327         unsigned long cnt;
6328         unsigned long long t;
6329         unsigned long usec_rem;
6330
6331         s = kmalloc(sizeof(*s), GFP_KERNEL);
6332         if (!s)
6333                 return -ENOMEM;
6334
6335         trace_seq_init(s);
6336
6337         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6338         trace_seq_printf(s, "entries: %ld\n", cnt);
6339
6340         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6341         trace_seq_printf(s, "overrun: %ld\n", cnt);
6342
6343         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6344         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6345
6346         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6347         trace_seq_printf(s, "bytes: %ld\n", cnt);
6348
6349         if (trace_clocks[tr->clock_id].in_ns) {
6350                 /* local or global for trace_clock */
6351                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6352                 usec_rem = do_div(t, USEC_PER_SEC);
6353                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6354                                                                 t, usec_rem);
6355
6356                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6357                 usec_rem = do_div(t, USEC_PER_SEC);
6358                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6359         } else {
6360                 /* counter or tsc mode for trace_clock */
6361                 trace_seq_printf(s, "oldest event ts: %llu\n",
6362                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6363
6364                 trace_seq_printf(s, "now ts: %llu\n",
6365                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6366         }
6367
6368         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6369         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6370
6371         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6372         trace_seq_printf(s, "read events: %ld\n", cnt);
6373
6374         count = simple_read_from_buffer(ubuf, count, ppos,
6375                                         s->buffer, trace_seq_used(s));
6376
6377         kfree(s);
6378
6379         return count;
6380 }
6381
6382 static const struct file_operations tracing_stats_fops = {
6383         .open           = tracing_open_generic_tr,
6384         .read           = tracing_stats_read,
6385         .llseek         = generic_file_llseek,
6386         .release        = tracing_release_generic_tr,
6387 };
6388
6389 #ifdef CONFIG_DYNAMIC_FTRACE
6390
6391 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6392 {
6393         return 0;
6394 }
6395
6396 static ssize_t
6397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6398                   size_t cnt, loff_t *ppos)
6399 {
6400         static char ftrace_dyn_info_buffer[1024];
6401         static DEFINE_MUTEX(dyn_info_mutex);
6402         unsigned long *p = filp->private_data;
6403         char *buf = ftrace_dyn_info_buffer;
6404         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6405         int r;
6406
6407         mutex_lock(&dyn_info_mutex);
6408         r = sprintf(buf, "%ld ", *p);
6409
6410         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6411         buf[r++] = '\n';
6412
6413         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6414
6415         mutex_unlock(&dyn_info_mutex);
6416
6417         return r;
6418 }
6419
6420 static const struct file_operations tracing_dyn_info_fops = {
6421         .open           = tracing_open_generic,
6422         .read           = tracing_read_dyn_info,
6423         .llseek         = generic_file_llseek,
6424 };
6425 #endif /* CONFIG_DYNAMIC_FTRACE */
6426
6427 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6428 static void
6429 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6430 {
6431         tracing_snapshot();
6432 }
6433
6434 static void
6435 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6436 {
6437         unsigned long *count = (long *)data;
6438
6439         if (!*count)
6440                 return;
6441
6442         if (*count != -1)
6443                 (*count)--;
6444
6445         tracing_snapshot();
6446 }
6447
6448 static int
6449 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6450                       struct ftrace_probe_ops *ops, void *data)
6451 {
6452         long count = (long)data;
6453
6454         seq_printf(m, "%ps:", (void *)ip);
6455
6456         seq_puts(m, "snapshot");
6457
6458         if (count == -1)
6459                 seq_puts(m, ":unlimited\n");
6460         else
6461                 seq_printf(m, ":count=%ld\n", count);
6462
6463         return 0;
6464 }
6465
6466 static struct ftrace_probe_ops snapshot_probe_ops = {
6467         .func                   = ftrace_snapshot,
6468         .print                  = ftrace_snapshot_print,
6469 };
6470
6471 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6472         .func                   = ftrace_count_snapshot,
6473         .print                  = ftrace_snapshot_print,
6474 };
6475
6476 static int
6477 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6478                                char *glob, char *cmd, char *param, int enable)
6479 {
6480         struct ftrace_probe_ops *ops;
6481         void *count = (void *)-1;
6482         char *number;
6483         int ret;
6484
6485         /* hash funcs only work with set_ftrace_filter */
6486         if (!enable)
6487                 return -EINVAL;
6488
6489         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6490
6491         if (glob[0] == '!') {
6492                 unregister_ftrace_function_probe_func(glob+1, ops);
6493                 return 0;
6494         }
6495
6496         if (!param)
6497                 goto out_reg;
6498
6499         number = strsep(&param, ":");
6500
6501         if (!strlen(number))
6502                 goto out_reg;
6503
6504         /*
6505          * We use the callback data field (which is a pointer)
6506          * as our counter.
6507          */
6508         ret = kstrtoul(number, 0, (unsigned long *)&count);
6509         if (ret)
6510                 return ret;
6511
6512  out_reg:
6513         ret = alloc_snapshot(&global_trace);
6514         if (ret < 0)
6515                 goto out;
6516
6517         ret = register_ftrace_function_probe(glob, ops, count);
6518
6519  out:
6520         return ret < 0 ? ret : 0;
6521 }
6522
6523 static struct ftrace_func_command ftrace_snapshot_cmd = {
6524         .name                   = "snapshot",
6525         .func                   = ftrace_trace_snapshot_callback,
6526 };
6527
6528 static __init int register_snapshot_cmd(void)
6529 {
6530         return register_ftrace_command(&ftrace_snapshot_cmd);
6531 }
6532 #else
6533 static inline __init int register_snapshot_cmd(void) { return 0; }
6534 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6535
6536 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6537 {
6538         if (WARN_ON(!tr->dir))
6539                 return ERR_PTR(-ENODEV);
6540
6541         /* Top directory uses NULL as the parent */
6542         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6543                 return NULL;
6544
6545         /* All sub buffers have a descriptor */
6546         return tr->dir;
6547 }
6548
6549 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6550 {
6551         struct dentry *d_tracer;
6552
6553         if (tr->percpu_dir)
6554                 return tr->percpu_dir;
6555
6556         d_tracer = tracing_get_dentry(tr);
6557         if (IS_ERR(d_tracer))
6558                 return NULL;
6559
6560         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6561
6562         WARN_ONCE(!tr->percpu_dir,
6563                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6564
6565         return tr->percpu_dir;
6566 }
6567
6568 static struct dentry *
6569 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6570                       void *data, long cpu, const struct file_operations *fops)
6571 {
6572         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6573
6574         if (ret) /* See tracing_get_cpu() */
6575                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6576         return ret;
6577 }
6578
6579 static void
6580 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6581 {
6582         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6583         struct dentry *d_cpu;
6584         char cpu_dir[30]; /* 30 characters should be more than enough */
6585
6586         if (!d_percpu)
6587                 return;
6588
6589         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6590         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6591         if (!d_cpu) {
6592                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6593                 return;
6594         }
6595
6596         /* per cpu trace_pipe */
6597         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6598                                 tr, cpu, &tracing_pipe_fops);
6599
6600         /* per cpu trace */
6601         trace_create_cpu_file("trace", 0644, d_cpu,
6602                                 tr, cpu, &tracing_fops);
6603
6604         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6605                                 tr, cpu, &tracing_buffers_fops);
6606
6607         trace_create_cpu_file("stats", 0444, d_cpu,
6608                                 tr, cpu, &tracing_stats_fops);
6609
6610         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6611                                 tr, cpu, &tracing_entries_fops);
6612
6613 #ifdef CONFIG_TRACER_SNAPSHOT
6614         trace_create_cpu_file("snapshot", 0644, d_cpu,
6615                                 tr, cpu, &snapshot_fops);
6616
6617         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6618                                 tr, cpu, &snapshot_raw_fops);
6619 #endif
6620 }
6621
6622 #ifdef CONFIG_FTRACE_SELFTEST
6623 /* Let selftest have access to static functions in this file */
6624 #include "trace_selftest.c"
6625 #endif
6626
6627 static ssize_t
6628 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6629                         loff_t *ppos)
6630 {
6631         struct trace_option_dentry *topt = filp->private_data;
6632         char *buf;
6633
6634         if (topt->flags->val & topt->opt->bit)
6635                 buf = "1\n";
6636         else
6637                 buf = "0\n";
6638
6639         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6640 }
6641
6642 static ssize_t
6643 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6644                          loff_t *ppos)
6645 {
6646         struct trace_option_dentry *topt = filp->private_data;
6647         unsigned long val;
6648         int ret;
6649
6650         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6651         if (ret)
6652                 return ret;
6653
6654         if (val != 0 && val != 1)
6655                 return -EINVAL;
6656
6657         if (!!(topt->flags->val & topt->opt->bit) != val) {
6658                 mutex_lock(&trace_types_lock);
6659                 ret = __set_tracer_option(topt->tr, topt->flags,
6660                                           topt->opt, !val);
6661                 mutex_unlock(&trace_types_lock);
6662                 if (ret)
6663                         return ret;
6664         }
6665
6666         *ppos += cnt;
6667
6668         return cnt;
6669 }
6670
6671
6672 static const struct file_operations trace_options_fops = {
6673         .open = tracing_open_generic,
6674         .read = trace_options_read,
6675         .write = trace_options_write,
6676         .llseek = generic_file_llseek,
6677 };
6678
6679 /*
6680  * In order to pass in both the trace_array descriptor as well as the index
6681  * to the flag that the trace option file represents, the trace_array
6682  * has a character array of trace_flags_index[], which holds the index
6683  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6684  * The address of this character array is passed to the flag option file
6685  * read/write callbacks.
6686  *
6687  * In order to extract both the index and the trace_array descriptor,
6688  * get_tr_index() uses the following algorithm.
6689  *
6690  *   idx = *ptr;
6691  *
6692  * As the pointer itself contains the address of the index (remember
6693  * index[1] == 1).
6694  *
6695  * Then to get the trace_array descriptor, by subtracting that index
6696  * from the ptr, we get to the start of the index itself.
6697  *
6698  *   ptr - idx == &index[0]
6699  *
6700  * Then a simple container_of() from that pointer gets us to the
6701  * trace_array descriptor.
6702  */
6703 static void get_tr_index(void *data, struct trace_array **ptr,
6704                          unsigned int *pindex)
6705 {
6706         *pindex = *(unsigned char *)data;
6707
6708         *ptr = container_of(data - *pindex, struct trace_array,
6709                             trace_flags_index);
6710 }
6711
6712 static ssize_t
6713 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6714                         loff_t *ppos)
6715 {
6716         void *tr_index = filp->private_data;
6717         struct trace_array *tr;
6718         unsigned int index;
6719         char *buf;
6720
6721         get_tr_index(tr_index, &tr, &index);
6722
6723         if (tr->trace_flags & (1 << index))
6724                 buf = "1\n";
6725         else
6726                 buf = "0\n";
6727
6728         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6729 }
6730
6731 static ssize_t
6732 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6733                          loff_t *ppos)
6734 {
6735         void *tr_index = filp->private_data;
6736         struct trace_array *tr;
6737         unsigned int index;
6738         unsigned long val;
6739         int ret;
6740
6741         get_tr_index(tr_index, &tr, &index);
6742
6743         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6744         if (ret)
6745                 return ret;
6746
6747         if (val != 0 && val != 1)
6748                 return -EINVAL;
6749
6750         mutex_lock(&trace_types_lock);
6751         ret = set_tracer_flag(tr, 1 << index, val);
6752         mutex_unlock(&trace_types_lock);
6753
6754         if (ret < 0)
6755                 return ret;
6756
6757         *ppos += cnt;
6758
6759         return cnt;
6760 }
6761
6762 static const struct file_operations trace_options_core_fops = {
6763         .open = tracing_open_generic,
6764         .read = trace_options_core_read,
6765         .write = trace_options_core_write,
6766         .llseek = generic_file_llseek,
6767 };
6768
6769 struct dentry *trace_create_file(const char *name,
6770                                  umode_t mode,
6771                                  struct dentry *parent,
6772                                  void *data,
6773                                  const struct file_operations *fops)
6774 {
6775         struct dentry *ret;
6776
6777         ret = tracefs_create_file(name, mode, parent, data, fops);
6778         if (!ret)
6779                 pr_warn("Could not create tracefs '%s' entry\n", name);
6780
6781         return ret;
6782 }
6783
6784
6785 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6786 {
6787         struct dentry *d_tracer;
6788
6789         if (tr->options)
6790                 return tr->options;
6791
6792         d_tracer = tracing_get_dentry(tr);
6793         if (IS_ERR(d_tracer))
6794                 return NULL;
6795
6796         tr->options = tracefs_create_dir("options", d_tracer);
6797         if (!tr->options) {
6798                 pr_warn("Could not create tracefs directory 'options'\n");
6799                 return NULL;
6800         }
6801
6802         return tr->options;
6803 }
6804
6805 static void
6806 create_trace_option_file(struct trace_array *tr,
6807                          struct trace_option_dentry *topt,
6808                          struct tracer_flags *flags,
6809                          struct tracer_opt *opt)
6810 {
6811         struct dentry *t_options;
6812
6813         t_options = trace_options_init_dentry(tr);
6814         if (!t_options)
6815                 return;
6816
6817         topt->flags = flags;
6818         topt->opt = opt;
6819         topt->tr = tr;
6820
6821         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6822                                     &trace_options_fops);
6823
6824 }
6825
6826 static void
6827 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6828 {
6829         struct trace_option_dentry *topts;
6830         struct trace_options *tr_topts;
6831         struct tracer_flags *flags;
6832         struct tracer_opt *opts;
6833         int cnt;
6834         int i;
6835
6836         if (!tracer)
6837                 return;
6838
6839         flags = tracer->flags;
6840
6841         if (!flags || !flags->opts)
6842                 return;
6843
6844         /*
6845          * If this is an instance, only create flags for tracers
6846          * the instance may have.
6847          */
6848         if (!trace_ok_for_array(tracer, tr))
6849                 return;
6850
6851         for (i = 0; i < tr->nr_topts; i++) {
6852                 /* Make sure there's no duplicate flags. */
6853                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6854                         return;
6855         }
6856
6857         opts = flags->opts;
6858
6859         for (cnt = 0; opts[cnt].name; cnt++)
6860                 ;
6861
6862         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6863         if (!topts)
6864                 return;
6865
6866         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6867                             GFP_KERNEL);
6868         if (!tr_topts) {
6869                 kfree(topts);
6870                 return;
6871         }
6872
6873         tr->topts = tr_topts;
6874         tr->topts[tr->nr_topts].tracer = tracer;
6875         tr->topts[tr->nr_topts].topts = topts;
6876         tr->nr_topts++;
6877
6878         for (cnt = 0; opts[cnt].name; cnt++) {
6879                 create_trace_option_file(tr, &topts[cnt], flags,
6880                                          &opts[cnt]);
6881                 WARN_ONCE(topts[cnt].entry == NULL,
6882                           "Failed to create trace option: %s",
6883                           opts[cnt].name);
6884         }
6885 }
6886
6887 static struct dentry *
6888 create_trace_option_core_file(struct trace_array *tr,
6889                               const char *option, long index)
6890 {
6891         struct dentry *t_options;
6892
6893         t_options = trace_options_init_dentry(tr);
6894         if (!t_options)
6895                 return NULL;
6896
6897         return trace_create_file(option, 0644, t_options,
6898                                  (void *)&tr->trace_flags_index[index],
6899                                  &trace_options_core_fops);
6900 }
6901
6902 static void create_trace_options_dir(struct trace_array *tr)
6903 {
6904         struct dentry *t_options;
6905         bool top_level = tr == &global_trace;
6906         int i;
6907
6908         t_options = trace_options_init_dentry(tr);
6909         if (!t_options)
6910                 return;
6911
6912         for (i = 0; trace_options[i]; i++) {
6913                 if (top_level ||
6914                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6915                         create_trace_option_core_file(tr, trace_options[i], i);
6916         }
6917 }
6918
6919 static ssize_t
6920 rb_simple_read(struct file *filp, char __user *ubuf,
6921                size_t cnt, loff_t *ppos)
6922 {
6923         struct trace_array *tr = filp->private_data;
6924         char buf[64];
6925         int r;
6926
6927         r = tracer_tracing_is_on(tr);
6928         r = sprintf(buf, "%d\n", r);
6929
6930         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6931 }
6932
6933 static ssize_t
6934 rb_simple_write(struct file *filp, const char __user *ubuf,
6935                 size_t cnt, loff_t *ppos)
6936 {
6937         struct trace_array *tr = filp->private_data;
6938         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6939         unsigned long val;
6940         int ret;
6941
6942         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6943         if (ret)
6944                 return ret;
6945
6946         if (buffer) {
6947                 mutex_lock(&trace_types_lock);
6948                 if (!!val == tracer_tracing_is_on(tr)) {
6949                         val = 0; /* do nothing */
6950                 } else if (val) {
6951                         tracer_tracing_on(tr);
6952                         if (tr->current_trace->start)
6953                                 tr->current_trace->start(tr);
6954                 } else {
6955                         tracer_tracing_off(tr);
6956                         if (tr->current_trace->stop)
6957                                 tr->current_trace->stop(tr);
6958                 }
6959                 mutex_unlock(&trace_types_lock);
6960         }
6961
6962         (*ppos)++;
6963
6964         return cnt;
6965 }
6966
6967 static const struct file_operations rb_simple_fops = {
6968         .open           = tracing_open_generic_tr,
6969         .read           = rb_simple_read,
6970         .write          = rb_simple_write,
6971         .release        = tracing_release_generic_tr,
6972         .llseek         = default_llseek,
6973 };
6974
6975 struct dentry *trace_instance_dir;
6976
6977 static void
6978 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6979
6980 static int
6981 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6982 {
6983         enum ring_buffer_flags rb_flags;
6984
6985         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6986
6987         buf->tr = tr;
6988
6989         buf->buffer = ring_buffer_alloc(size, rb_flags);
6990         if (!buf->buffer)
6991                 return -ENOMEM;
6992
6993         buf->data = alloc_percpu(struct trace_array_cpu);
6994         if (!buf->data) {
6995                 ring_buffer_free(buf->buffer);
6996                 buf->buffer = NULL;
6997                 return -ENOMEM;
6998         }
6999
7000         /* Allocate the first page for all buffers */
7001         set_buffer_entries(&tr->trace_buffer,
7002                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7003
7004         return 0;
7005 }
7006
7007 static int allocate_trace_buffers(struct trace_array *tr, int size)
7008 {
7009         int ret;
7010
7011         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7012         if (ret)
7013                 return ret;
7014
7015 #ifdef CONFIG_TRACER_MAX_TRACE
7016         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7017                                     allocate_snapshot ? size : 1);
7018         if (WARN_ON(ret)) {
7019                 ring_buffer_free(tr->trace_buffer.buffer);
7020                 tr->trace_buffer.buffer = NULL;
7021                 free_percpu(tr->trace_buffer.data);
7022                 tr->trace_buffer.data = NULL;
7023                 return -ENOMEM;
7024         }
7025         tr->allocated_snapshot = allocate_snapshot;
7026
7027         /*
7028          * Only the top level trace array gets its snapshot allocated
7029          * from the kernel command line.
7030          */
7031         allocate_snapshot = false;
7032 #endif
7033
7034         /*
7035          * Because of some magic with the way alloc_percpu() works on
7036          * x86_64, we need to synchronize the pgd of all the tables,
7037          * otherwise the trace events that happen in x86_64 page fault
7038          * handlers can't cope with accessing the chance that a
7039          * alloc_percpu()'d memory might be touched in the page fault trace
7040          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7041          * calls in tracing, because something might get triggered within a
7042          * page fault trace event!
7043          */
7044         vmalloc_sync_mappings();
7045
7046         return 0;
7047 }
7048
7049 static void free_trace_buffer(struct trace_buffer *buf)
7050 {
7051         if (buf->buffer) {
7052                 ring_buffer_free(buf->buffer);
7053                 buf->buffer = NULL;
7054                 free_percpu(buf->data);
7055                 buf->data = NULL;
7056         }
7057 }
7058
7059 static void free_trace_buffers(struct trace_array *tr)
7060 {
7061         if (!tr)
7062                 return;
7063
7064         free_trace_buffer(&tr->trace_buffer);
7065
7066 #ifdef CONFIG_TRACER_MAX_TRACE
7067         free_trace_buffer(&tr->max_buffer);
7068 #endif
7069 }
7070
7071 static void init_trace_flags_index(struct trace_array *tr)
7072 {
7073         int i;
7074
7075         /* Used by the trace options files */
7076         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7077                 tr->trace_flags_index[i] = i;
7078 }
7079
7080 static void __update_tracer_options(struct trace_array *tr)
7081 {
7082         struct tracer *t;
7083
7084         for (t = trace_types; t; t = t->next)
7085                 add_tracer_options(tr, t);
7086 }
7087
7088 static void update_tracer_options(struct trace_array *tr)
7089 {
7090         mutex_lock(&trace_types_lock);
7091         tracer_options_updated = true;
7092         __update_tracer_options(tr);
7093         mutex_unlock(&trace_types_lock);
7094 }
7095
7096 static int instance_mkdir(const char *name)
7097 {
7098         struct trace_array *tr;
7099         int ret;
7100
7101         mutex_lock(&trace_types_lock);
7102
7103         ret = -EEXIST;
7104         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7105                 if (tr->name && strcmp(tr->name, name) == 0)
7106                         goto out_unlock;
7107         }
7108
7109         ret = -ENOMEM;
7110         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7111         if (!tr)
7112                 goto out_unlock;
7113
7114         tr->name = kstrdup(name, GFP_KERNEL);
7115         if (!tr->name)
7116                 goto out_free_tr;
7117
7118         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7119                 goto out_free_tr;
7120
7121         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7122
7123         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7124
7125         raw_spin_lock_init(&tr->start_lock);
7126
7127         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7128
7129         tr->current_trace = &nop_trace;
7130
7131         INIT_LIST_HEAD(&tr->systems);
7132         INIT_LIST_HEAD(&tr->events);
7133
7134         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7135                 goto out_free_tr;
7136
7137         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7138         if (!tr->dir)
7139                 goto out_free_tr;
7140
7141         ret = event_trace_add_tracer(tr->dir, tr);
7142         if (ret) {
7143                 tracefs_remove_recursive(tr->dir);
7144                 goto out_free_tr;
7145         }
7146
7147         init_tracer_tracefs(tr, tr->dir);
7148         init_trace_flags_index(tr);
7149         __update_tracer_options(tr);
7150
7151         list_add(&tr->list, &ftrace_trace_arrays);
7152
7153         mutex_unlock(&trace_types_lock);
7154
7155         return 0;
7156
7157  out_free_tr:
7158         free_trace_buffers(tr);
7159         free_cpumask_var(tr->tracing_cpumask);
7160         kfree(tr->name);
7161         kfree(tr);
7162
7163  out_unlock:
7164         mutex_unlock(&trace_types_lock);
7165
7166         return ret;
7167
7168 }
7169
7170 static int instance_rmdir(const char *name)
7171 {
7172         struct trace_array *tr;
7173         int found = 0;
7174         int ret;
7175         int i;
7176
7177         mutex_lock(&trace_types_lock);
7178
7179         ret = -ENODEV;
7180         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7181                 if (tr->name && strcmp(tr->name, name) == 0) {
7182                         found = 1;
7183                         break;
7184                 }
7185         }
7186         if (!found)
7187                 goto out_unlock;
7188
7189         ret = -EBUSY;
7190         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7191                 goto out_unlock;
7192
7193         list_del(&tr->list);
7194
7195         /* Disable all the flags that were enabled coming in */
7196         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7197                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7198                         set_tracer_flag(tr, 1 << i, 0);
7199         }
7200
7201         tracing_set_nop(tr);
7202         event_trace_del_tracer(tr);
7203         ftrace_clear_pids(tr);
7204         ftrace_destroy_function_files(tr);
7205         tracefs_remove_recursive(tr->dir);
7206         free_trace_buffers(tr);
7207
7208         for (i = 0; i < tr->nr_topts; i++) {
7209                 kfree(tr->topts[i].topts);
7210         }
7211         kfree(tr->topts);
7212
7213         free_cpumask_var(tr->tracing_cpumask);
7214         kfree(tr->name);
7215         kfree(tr);
7216
7217         ret = 0;
7218
7219  out_unlock:
7220         mutex_unlock(&trace_types_lock);
7221
7222         return ret;
7223 }
7224
7225 static __init void create_trace_instances(struct dentry *d_tracer)
7226 {
7227         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7228                                                          instance_mkdir,
7229                                                          instance_rmdir);
7230         if (WARN_ON(!trace_instance_dir))
7231                 return;
7232 }
7233
7234 static void
7235 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7236 {
7237         int cpu;
7238
7239         trace_create_file("available_tracers", 0444, d_tracer,
7240                         tr, &show_traces_fops);
7241
7242         trace_create_file("current_tracer", 0644, d_tracer,
7243                         tr, &set_tracer_fops);
7244
7245         trace_create_file("tracing_cpumask", 0644, d_tracer,
7246                           tr, &tracing_cpumask_fops);
7247
7248         trace_create_file("trace_options", 0644, d_tracer,
7249                           tr, &tracing_iter_fops);
7250
7251         trace_create_file("trace", 0644, d_tracer,
7252                           tr, &tracing_fops);
7253
7254         trace_create_file("trace_pipe", 0444, d_tracer,
7255                           tr, &tracing_pipe_fops);
7256
7257         trace_create_file("buffer_size_kb", 0644, d_tracer,
7258                           tr, &tracing_entries_fops);
7259
7260         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7261                           tr, &tracing_total_entries_fops);
7262
7263         trace_create_file("free_buffer", 0200, d_tracer,
7264                           tr, &tracing_free_buffer_fops);
7265
7266         trace_create_file("trace_marker", 0220, d_tracer,
7267                           tr, &tracing_mark_fops);
7268
7269         trace_create_file("trace_clock", 0644, d_tracer, tr,
7270                           &trace_clock_fops);
7271
7272         trace_create_file("tracing_on", 0644, d_tracer,
7273                           tr, &rb_simple_fops);
7274
7275         create_trace_options_dir(tr);
7276
7277 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7278         trace_create_file("tracing_max_latency", 0644, d_tracer,
7279                         &tr->max_latency, &tracing_max_lat_fops);
7280 #endif
7281
7282         if (ftrace_create_function_files(tr, d_tracer))
7283                 WARN(1, "Could not allocate function filter files");
7284
7285 #ifdef CONFIG_TRACER_SNAPSHOT
7286         trace_create_file("snapshot", 0644, d_tracer,
7287                           tr, &snapshot_fops);
7288 #endif
7289
7290         for_each_tracing_cpu(cpu)
7291                 tracing_init_tracefs_percpu(tr, cpu);
7292
7293         ftrace_init_tracefs(tr, d_tracer);
7294 }
7295
7296 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7297 {
7298         struct vfsmount *mnt;
7299         struct file_system_type *type;
7300
7301         /*
7302          * To maintain backward compatibility for tools that mount
7303          * debugfs to get to the tracing facility, tracefs is automatically
7304          * mounted to the debugfs/tracing directory.
7305          */
7306         type = get_fs_type("tracefs");
7307         if (!type)
7308                 return NULL;
7309         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7310         put_filesystem(type);
7311         if (IS_ERR(mnt))
7312                 return NULL;
7313         mntget(mnt);
7314
7315         return mnt;
7316 }
7317
7318 /**
7319  * tracing_init_dentry - initialize top level trace array
7320  *
7321  * This is called when creating files or directories in the tracing
7322  * directory. It is called via fs_initcall() by any of the boot up code
7323  * and expects to return the dentry of the top level tracing directory.
7324  */
7325 struct dentry *tracing_init_dentry(void)
7326 {
7327         struct trace_array *tr = &global_trace;
7328
7329         /* The top level trace array uses  NULL as parent */
7330         if (tr->dir)
7331                 return NULL;
7332
7333         if (WARN_ON(!tracefs_initialized()) ||
7334                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7335                  WARN_ON(!debugfs_initialized())))
7336                 return ERR_PTR(-ENODEV);
7337
7338         /*
7339          * As there may still be users that expect the tracing
7340          * files to exist in debugfs/tracing, we must automount
7341          * the tracefs file system there, so older tools still
7342          * work with the newer kerenl.
7343          */
7344         tr->dir = debugfs_create_automount("tracing", NULL,
7345                                            trace_automount, NULL);
7346         if (!tr->dir) {
7347                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7348                 return ERR_PTR(-ENOMEM);
7349         }
7350
7351         return NULL;
7352 }
7353
7354 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7355 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7356
7357 static void __init trace_enum_init(void)
7358 {
7359         int len;
7360
7361         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7362         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7363 }
7364
7365 #ifdef CONFIG_MODULES
7366 static void trace_module_add_enums(struct module *mod)
7367 {
7368         if (!mod->num_trace_enums)
7369                 return;
7370
7371         /*
7372          * Modules with bad taint do not have events created, do
7373          * not bother with enums either.
7374          */
7375         if (trace_module_has_bad_taint(mod))
7376                 return;
7377
7378         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7379 }
7380
7381 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7382 static void trace_module_remove_enums(struct module *mod)
7383 {
7384         union trace_enum_map_item *map;
7385         union trace_enum_map_item **last = &trace_enum_maps;
7386
7387         if (!mod->num_trace_enums)
7388                 return;
7389
7390         mutex_lock(&trace_enum_mutex);
7391
7392         map = trace_enum_maps;
7393
7394         while (map) {
7395                 if (map->head.mod == mod)
7396                         break;
7397                 map = trace_enum_jmp_to_tail(map);
7398                 last = &map->tail.next;
7399                 map = map->tail.next;
7400         }
7401         if (!map)
7402                 goto out;
7403
7404         *last = trace_enum_jmp_to_tail(map)->tail.next;
7405         kfree(map);
7406  out:
7407         mutex_unlock(&trace_enum_mutex);
7408 }
7409 #else
7410 static inline void trace_module_remove_enums(struct module *mod) { }
7411 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7412
7413 static int trace_module_notify(struct notifier_block *self,
7414                                unsigned long val, void *data)
7415 {
7416         struct module *mod = data;
7417
7418         switch (val) {
7419         case MODULE_STATE_COMING:
7420                 trace_module_add_enums(mod);
7421                 break;
7422         case MODULE_STATE_GOING:
7423                 trace_module_remove_enums(mod);
7424                 break;
7425         }
7426
7427         return 0;
7428 }
7429
7430 static struct notifier_block trace_module_nb = {
7431         .notifier_call = trace_module_notify,
7432         .priority = 0,
7433 };
7434 #endif /* CONFIG_MODULES */
7435
7436 static __init int tracer_init_tracefs(void)
7437 {
7438         struct dentry *d_tracer;
7439
7440         trace_access_lock_init();
7441
7442         d_tracer = tracing_init_dentry();
7443         if (IS_ERR(d_tracer))
7444                 return 0;
7445
7446         init_tracer_tracefs(&global_trace, d_tracer);
7447         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7448
7449         trace_create_file("tracing_thresh", 0644, d_tracer,
7450                         &global_trace, &tracing_thresh_fops);
7451
7452         trace_create_file("README", 0444, d_tracer,
7453                         NULL, &tracing_readme_fops);
7454
7455         trace_create_file("saved_cmdlines", 0444, d_tracer,
7456                         NULL, &tracing_saved_cmdlines_fops);
7457
7458         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7459                           NULL, &tracing_saved_cmdlines_size_fops);
7460
7461         trace_enum_init();
7462
7463         trace_create_enum_file(d_tracer);
7464
7465 #ifdef CONFIG_MODULES
7466         register_module_notifier(&trace_module_nb);
7467 #endif
7468
7469 #ifdef CONFIG_DYNAMIC_FTRACE
7470         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7471                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7472 #endif
7473
7474         create_trace_instances(d_tracer);
7475
7476         update_tracer_options(&global_trace);
7477
7478         return 0;
7479 }
7480
7481 static int trace_panic_handler(struct notifier_block *this,
7482                                unsigned long event, void *unused)
7483 {
7484         if (ftrace_dump_on_oops)
7485                 ftrace_dump(ftrace_dump_on_oops);
7486         return NOTIFY_OK;
7487 }
7488
7489 static struct notifier_block trace_panic_notifier = {
7490         .notifier_call  = trace_panic_handler,
7491         .next           = NULL,
7492         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7493 };
7494
7495 static int trace_die_handler(struct notifier_block *self,
7496                              unsigned long val,
7497                              void *data)
7498 {
7499         switch (val) {
7500         case DIE_OOPS:
7501                 if (ftrace_dump_on_oops)
7502                         ftrace_dump(ftrace_dump_on_oops);
7503                 break;
7504         default:
7505                 break;
7506         }
7507         return NOTIFY_OK;
7508 }
7509
7510 static struct notifier_block trace_die_notifier = {
7511         .notifier_call = trace_die_handler,
7512         .priority = 200
7513 };
7514
7515 /*
7516  * printk is set to max of 1024, we really don't need it that big.
7517  * Nothing should be printing 1000 characters anyway.
7518  */
7519 #define TRACE_MAX_PRINT         1000
7520
7521 /*
7522  * Define here KERN_TRACE so that we have one place to modify
7523  * it if we decide to change what log level the ftrace dump
7524  * should be at.
7525  */
7526 #define KERN_TRACE              KERN_EMERG
7527
7528 void
7529 trace_printk_seq(struct trace_seq *s)
7530 {
7531         /* Probably should print a warning here. */
7532         if (s->seq.len >= TRACE_MAX_PRINT)
7533                 s->seq.len = TRACE_MAX_PRINT;
7534
7535         /*
7536          * More paranoid code. Although the buffer size is set to
7537          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7538          * an extra layer of protection.
7539          */
7540         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7541                 s->seq.len = s->seq.size - 1;
7542
7543         /* should be zero ended, but we are paranoid. */
7544         s->buffer[s->seq.len] = 0;
7545
7546         printk(KERN_TRACE "%s", s->buffer);
7547
7548         trace_seq_init(s);
7549 }
7550
7551 void trace_init_global_iter(struct trace_iterator *iter)
7552 {
7553         iter->tr = &global_trace;
7554         iter->trace = iter->tr->current_trace;
7555         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7556         iter->trace_buffer = &global_trace.trace_buffer;
7557
7558         if (iter->trace && iter->trace->open)
7559                 iter->trace->open(iter);
7560
7561         /* Annotate start of buffers if we had overruns */
7562         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7563                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7564
7565         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7566         if (trace_clocks[iter->tr->clock_id].in_ns)
7567                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7568 }
7569
7570 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7571 {
7572         /* use static because iter can be a bit big for the stack */
7573         static struct trace_iterator iter;
7574         static atomic_t dump_running;
7575         struct trace_array *tr = &global_trace;
7576         unsigned int old_userobj;
7577         unsigned long flags;
7578         int cnt = 0, cpu;
7579
7580         /* Only allow one dump user at a time. */
7581         if (atomic_inc_return(&dump_running) != 1) {
7582                 atomic_dec(&dump_running);
7583                 return;
7584         }
7585
7586         /*
7587          * Always turn off tracing when we dump.
7588          * We don't need to show trace output of what happens
7589          * between multiple crashes.
7590          *
7591          * If the user does a sysrq-z, then they can re-enable
7592          * tracing with echo 1 > tracing_on.
7593          */
7594         tracing_off();
7595
7596         local_irq_save(flags);
7597
7598         /* Simulate the iterator */
7599         trace_init_global_iter(&iter);
7600
7601         for_each_tracing_cpu(cpu) {
7602                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7603         }
7604
7605         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7606
7607         /* don't look at user memory in panic mode */
7608         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7609
7610         switch (oops_dump_mode) {
7611         case DUMP_ALL:
7612                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7613                 break;
7614         case DUMP_ORIG:
7615                 iter.cpu_file = raw_smp_processor_id();
7616                 break;
7617         case DUMP_NONE:
7618                 goto out_enable;
7619         default:
7620                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7621                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7622         }
7623
7624         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7625
7626         /* Did function tracer already get disabled? */
7627         if (ftrace_is_dead()) {
7628                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7629                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7630         }
7631
7632         /*
7633          * We need to stop all tracing on all CPUS to read the
7634          * the next buffer. This is a bit expensive, but is
7635          * not done often. We fill all what we can read,
7636          * and then release the locks again.
7637          */
7638
7639         while (!trace_empty(&iter)) {
7640
7641                 if (!cnt)
7642                         printk(KERN_TRACE "---------------------------------\n");
7643
7644                 cnt++;
7645
7646                 trace_iterator_reset(&iter);
7647                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7648
7649                 if (trace_find_next_entry_inc(&iter) != NULL) {
7650                         int ret;
7651
7652                         ret = print_trace_line(&iter);
7653                         if (ret != TRACE_TYPE_NO_CONSUME)
7654                                 trace_consume(&iter);
7655                 }
7656                 touch_nmi_watchdog();
7657
7658                 trace_printk_seq(&iter.seq);
7659         }
7660
7661         if (!cnt)
7662                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7663         else
7664                 printk(KERN_TRACE "---------------------------------\n");
7665
7666  out_enable:
7667         tr->trace_flags |= old_userobj;
7668
7669         for_each_tracing_cpu(cpu) {
7670                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7671         }
7672         atomic_dec(&dump_running);
7673         local_irq_restore(flags);
7674 }
7675 EXPORT_SYMBOL_GPL(ftrace_dump);
7676
7677 __init static int tracer_alloc_buffers(void)
7678 {
7679         int ring_buf_size;
7680         int ret = -ENOMEM;
7681
7682         /*
7683          * Make sure we don't accidently add more trace options
7684          * than we have bits for.
7685          */
7686         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7687
7688         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7689                 goto out;
7690
7691         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7692                 goto out_free_buffer_mask;
7693
7694         /* Only allocate trace_printk buffers if a trace_printk exists */
7695         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7696                 /* Must be called before global_trace.buffer is allocated */
7697                 trace_printk_init_buffers();
7698
7699         /* To save memory, keep the ring buffer size to its minimum */
7700         if (ring_buffer_expanded)
7701                 ring_buf_size = trace_buf_size;
7702         else
7703                 ring_buf_size = 1;
7704
7705         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7706         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7707
7708         raw_spin_lock_init(&global_trace.start_lock);
7709
7710         /* Used for event triggers */
7711         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7712         if (!temp_buffer)
7713                 goto out_free_cpumask;
7714
7715         if (trace_create_savedcmd() < 0)
7716                 goto out_free_temp_buffer;
7717
7718         /* TODO: make the number of buffers hot pluggable with CPUS */
7719         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7720                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7721                 WARN_ON(1);
7722                 goto out_free_savedcmd;
7723         }
7724
7725         if (global_trace.buffer_disabled)
7726                 tracing_off();
7727
7728         if (trace_boot_clock) {
7729                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7730                 if (ret < 0)
7731                         pr_warn("Trace clock %s not defined, going back to default\n",
7732                                 trace_boot_clock);
7733         }
7734
7735         /*
7736          * register_tracer() might reference current_trace, so it
7737          * needs to be set before we register anything. This is
7738          * just a bootstrap of current_trace anyway.
7739          */
7740         global_trace.current_trace = &nop_trace;
7741
7742         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7743
7744         ftrace_init_global_array_ops(&global_trace);
7745
7746         init_trace_flags_index(&global_trace);
7747
7748         register_tracer(&nop_trace);
7749
7750         /* All seems OK, enable tracing */
7751         tracing_disabled = 0;
7752
7753         atomic_notifier_chain_register(&panic_notifier_list,
7754                                        &trace_panic_notifier);
7755
7756         register_die_notifier(&trace_die_notifier);
7757
7758         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7759
7760         INIT_LIST_HEAD(&global_trace.systems);
7761         INIT_LIST_HEAD(&global_trace.events);
7762         list_add(&global_trace.list, &ftrace_trace_arrays);
7763
7764         apply_trace_boot_options();
7765
7766         register_snapshot_cmd();
7767
7768         return 0;
7769
7770 out_free_savedcmd:
7771         free_saved_cmdlines_buffer(savedcmd);
7772 out_free_temp_buffer:
7773         ring_buffer_free(temp_buffer);
7774 out_free_cpumask:
7775         free_cpumask_var(global_trace.tracing_cpumask);
7776 out_free_buffer_mask:
7777         free_cpumask_var(tracing_buffer_mask);
7778 out:
7779         return ret;
7780 }
7781
7782 void __init trace_init(void)
7783 {
7784         if (tracepoint_printk) {
7785                 tracepoint_print_iter =
7786                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7787                 if (WARN_ON(!tracepoint_print_iter))
7788                         tracepoint_printk = 0;
7789         }
7790         tracer_alloc_buffers();
7791         trace_event_init();
7792 }
7793
7794 __init static int clear_boot_tracer(void)
7795 {
7796         /*
7797          * The default tracer at boot buffer is an init section.
7798          * This function is called in lateinit. If we did not
7799          * find the boot tracer, then clear it out, to prevent
7800          * later registration from accessing the buffer that is
7801          * about to be freed.
7802          */
7803         if (!default_bootup_tracer)
7804                 return 0;
7805
7806         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7807                default_bootup_tracer);
7808         default_bootup_tracer = NULL;
7809
7810         return 0;
7811 }
7812
7813 fs_initcall(tracer_init_tracefs);
7814 late_initcall_sync(clear_boot_tracer);