GNU Linux-libre 4.9.290-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list) {
504                 trace_parser_put(&parser);
505                 return -ENOMEM;
506         }
507
508         pid_list->pid_max = READ_ONCE(pid_max);
509
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 trace_parser_put(&parser);
517                 kfree(pid_list);
518                 return -ENOMEM;
519         }
520
521         if (filtered_pids) {
522                 /* copy the current bits to the new max */
523                 for_each_set_bit(pid, filtered_pids->pids,
524                                  filtered_pids->pid_max) {
525                         set_bit(pid, pid_list->pids);
526                         nr_pids++;
527                 }
528         }
529
530         while (cnt > 0) {
531
532                 pos = 0;
533
534                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
535                 if (ret < 0 || !trace_parser_loaded(&parser))
536                         break;
537
538                 read += ret;
539                 ubuf += ret;
540                 cnt -= ret;
541
542                 parser.buffer[parser.idx] = 0;
543
544                 ret = -EINVAL;
545                 if (kstrtoul(parser.buffer, 0, &val))
546                         break;
547                 if (val >= pid_list->pid_max)
548                         break;
549
550                 pid = (pid_t)val;
551
552                 set_bit(pid, pid_list->pids);
553                 nr_pids++;
554
555                 trace_parser_clear(&parser);
556                 ret = 0;
557         }
558         trace_parser_put(&parser);
559
560         if (ret < 0) {
561                 trace_free_pid_list(pid_list);
562                 return ret;
563         }
564
565         if (!nr_pids) {
566                 /* Cleared the list of pids */
567                 trace_free_pid_list(pid_list);
568                 read = ret;
569                 pid_list = NULL;
570         }
571
572         *new_pid_list = pid_list;
573
574         return read;
575 }
576
577 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
578 {
579         u64 ts;
580
581         /* Early boot up does not have a buffer yet */
582         if (!buf->buffer)
583                 return trace_clock_local();
584
585         ts = ring_buffer_time_stamp(buf->buffer, cpu);
586         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
587
588         return ts;
589 }
590
591 cycle_t ftrace_now(int cpu)
592 {
593         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
594 }
595
596 /**
597  * tracing_is_enabled - Show if global_trace has been disabled
598  *
599  * Shows if the global trace has been enabled or not. It uses the
600  * mirror flag "buffer_disabled" to be used in fast paths such as for
601  * the irqsoff tracer. But it may be inaccurate due to races. If you
602  * need to know the accurate state, use tracing_is_on() which is a little
603  * slower, but accurate.
604  */
605 int tracing_is_enabled(void)
606 {
607         /*
608          * For quick access (irqsoff uses this in fast path), just
609          * return the mirror variable of the state of the ring buffer.
610          * It's a little racy, but we don't really care.
611          */
612         smp_rmb();
613         return !global_trace.buffer_disabled;
614 }
615
616 /*
617  * trace_buf_size is the size in bytes that is allocated
618  * for a buffer. Note, the number of bytes is always rounded
619  * to page size.
620  *
621  * This number is purposely set to a low number of 16384.
622  * If the dump on oops happens, it will be much appreciated
623  * to not have to wait for all that output. Anyway this can be
624  * boot time and run time configurable.
625  */
626 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
627
628 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
629
630 /* trace_types holds a link list of available tracers. */
631 static struct tracer            *trace_types __read_mostly;
632
633 /*
634  * trace_types_lock is used to protect the trace_types list.
635  */
636 DEFINE_MUTEX(trace_types_lock);
637
638 /*
639  * serialize the access of the ring buffer
640  *
641  * ring buffer serializes readers, but it is low level protection.
642  * The validity of the events (which returns by ring_buffer_peek() ..etc)
643  * are not protected by ring buffer.
644  *
645  * The content of events may become garbage if we allow other process consumes
646  * these events concurrently:
647  *   A) the page of the consumed events may become a normal page
648  *      (not reader page) in ring buffer, and this page will be rewrited
649  *      by events producer.
650  *   B) The page of the consumed events may become a page for splice_read,
651  *      and this page will be returned to system.
652  *
653  * These primitives allow multi process access to different cpu ring buffer
654  * concurrently.
655  *
656  * These primitives don't distinguish read-only and read-consume access.
657  * Multi read-only access are also serialized.
658  */
659
660 #ifdef CONFIG_SMP
661 static DECLARE_RWSEM(all_cpu_access_lock);
662 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
663
664 static inline void trace_access_lock(int cpu)
665 {
666         if (cpu == RING_BUFFER_ALL_CPUS) {
667                 /* gain it for accessing the whole ring buffer. */
668                 down_write(&all_cpu_access_lock);
669         } else {
670                 /* gain it for accessing a cpu ring buffer. */
671
672                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
673                 down_read(&all_cpu_access_lock);
674
675                 /* Secondly block other access to this @cpu ring buffer. */
676                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
677         }
678 }
679
680 static inline void trace_access_unlock(int cpu)
681 {
682         if (cpu == RING_BUFFER_ALL_CPUS) {
683                 up_write(&all_cpu_access_lock);
684         } else {
685                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
686                 up_read(&all_cpu_access_lock);
687         }
688 }
689
690 static inline void trace_access_lock_init(void)
691 {
692         int cpu;
693
694         for_each_possible_cpu(cpu)
695                 mutex_init(&per_cpu(cpu_access_lock, cpu));
696 }
697
698 #else
699
700 static DEFINE_MUTEX(access_lock);
701
702 static inline void trace_access_lock(int cpu)
703 {
704         (void)cpu;
705         mutex_lock(&access_lock);
706 }
707
708 static inline void trace_access_unlock(int cpu)
709 {
710         (void)cpu;
711         mutex_unlock(&access_lock);
712 }
713
714 static inline void trace_access_lock_init(void)
715 {
716 }
717
718 #endif
719
720 #ifdef CONFIG_STACKTRACE
721 static void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                  unsigned long flags,
723                                  int skip, int pc, struct pt_regs *regs);
724 static inline void ftrace_trace_stack(struct trace_array *tr,
725                                       struct ring_buffer *buffer,
726                                       unsigned long flags,
727                                       int skip, int pc, struct pt_regs *regs);
728
729 #else
730 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
731                                         unsigned long flags,
732                                         int skip, int pc, struct pt_regs *regs)
733 {
734 }
735 static inline void ftrace_trace_stack(struct trace_array *tr,
736                                       struct ring_buffer *buffer,
737                                       unsigned long flags,
738                                       int skip, int pc, struct pt_regs *regs)
739 {
740 }
741
742 #endif
743
744 static void tracer_tracing_on(struct trace_array *tr)
745 {
746         if (tr->trace_buffer.buffer)
747                 ring_buffer_record_on(tr->trace_buffer.buffer);
748         /*
749          * This flag is looked at when buffers haven't been allocated
750          * yet, or by some tracers (like irqsoff), that just want to
751          * know if the ring buffer has been disabled, but it can handle
752          * races of where it gets disabled but we still do a record.
753          * As the check is in the fast path of the tracers, it is more
754          * important to be fast than accurate.
755          */
756         tr->buffer_disabled = 0;
757         /* Make the flag seen by readers */
758         smp_wmb();
759 }
760
761 /**
762  * tracing_on - enable tracing buffers
763  *
764  * This function enables tracing buffers that may have been
765  * disabled with tracing_off.
766  */
767 void tracing_on(void)
768 {
769         tracer_tracing_on(&global_trace);
770 }
771 EXPORT_SYMBOL_GPL(tracing_on);
772
773 /**
774  * __trace_puts - write a constant string into the trace buffer.
775  * @ip:    The address of the caller
776  * @str:   The constant string to write
777  * @size:  The size of the string.
778  */
779 int __trace_puts(unsigned long ip, const char *str, int size)
780 {
781         struct ring_buffer_event *event;
782         struct ring_buffer *buffer;
783         struct print_entry *entry;
784         unsigned long irq_flags;
785         int alloc;
786         int pc;
787
788         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
789                 return 0;
790
791         pc = preempt_count();
792
793         if (unlikely(tracing_selftest_running || tracing_disabled))
794                 return 0;
795
796         alloc = sizeof(*entry) + size + 2; /* possible \n added */
797
798         local_save_flags(irq_flags);
799         buffer = global_trace.trace_buffer.buffer;
800         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
801                                           irq_flags, pc);
802         if (!event)
803                 return 0;
804
805         entry = ring_buffer_event_data(event);
806         entry->ip = ip;
807
808         memcpy(&entry->buf, str, size);
809
810         /* Add a newline if necessary */
811         if (entry->buf[size - 1] != '\n') {
812                 entry->buf[size] = '\n';
813                 entry->buf[size + 1] = '\0';
814         } else
815                 entry->buf[size] = '\0';
816
817         __buffer_unlock_commit(buffer, event);
818         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
819
820         return size;
821 }
822 EXPORT_SYMBOL_GPL(__trace_puts);
823
824 /**
825  * __trace_bputs - write the pointer to a constant string into trace buffer
826  * @ip:    The address of the caller
827  * @str:   The constant string to write to the buffer to
828  */
829 int __trace_bputs(unsigned long ip, const char *str)
830 {
831         struct ring_buffer_event *event;
832         struct ring_buffer *buffer;
833         struct bputs_entry *entry;
834         unsigned long irq_flags;
835         int size = sizeof(struct bputs_entry);
836         int pc;
837
838         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
839                 return 0;
840
841         pc = preempt_count();
842
843         if (unlikely(tracing_selftest_running || tracing_disabled))
844                 return 0;
845
846         local_save_flags(irq_flags);
847         buffer = global_trace.trace_buffer.buffer;
848         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
849                                           irq_flags, pc);
850         if (!event)
851                 return 0;
852
853         entry = ring_buffer_event_data(event);
854         entry->ip                       = ip;
855         entry->str                      = str;
856
857         __buffer_unlock_commit(buffer, event);
858         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
859
860         return 1;
861 }
862 EXPORT_SYMBOL_GPL(__trace_bputs);
863
864 #ifdef CONFIG_TRACER_SNAPSHOT
865 /**
866  * trace_snapshot - take a snapshot of the current buffer.
867  *
868  * This causes a swap between the snapshot buffer and the current live
869  * tracing buffer. You can use this to take snapshots of the live
870  * trace when some condition is triggered, but continue to trace.
871  *
872  * Note, make sure to allocate the snapshot with either
873  * a tracing_snapshot_alloc(), or by doing it manually
874  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
875  *
876  * If the snapshot buffer is not allocated, it will stop tracing.
877  * Basically making a permanent snapshot.
878  */
879 void tracing_snapshot(void)
880 {
881         struct trace_array *tr = &global_trace;
882         struct tracer *tracer = tr->current_trace;
883         unsigned long flags;
884
885         if (in_nmi()) {
886                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
887                 internal_trace_puts("*** snapshot is being ignored        ***\n");
888                 return;
889         }
890
891         if (!tr->allocated_snapshot) {
892                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
893                 internal_trace_puts("*** stopping trace here!   ***\n");
894                 tracing_off();
895                 return;
896         }
897
898         /* Note, snapshot can not be used when the tracer uses it */
899         if (tracer->use_max_tr) {
900                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
901                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
902                 return;
903         }
904
905         local_irq_save(flags);
906         update_max_tr(tr, current, smp_processor_id());
907         local_irq_restore(flags);
908 }
909 EXPORT_SYMBOL_GPL(tracing_snapshot);
910
911 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
912                                         struct trace_buffer *size_buf, int cpu_id);
913 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
914
915 static int alloc_snapshot(struct trace_array *tr)
916 {
917         int ret;
918
919         if (!tr->allocated_snapshot) {
920
921                 /* allocate spare buffer */
922                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
923                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
924                 if (ret < 0)
925                         return ret;
926
927                 tr->allocated_snapshot = true;
928         }
929
930         return 0;
931 }
932
933 static void free_snapshot(struct trace_array *tr)
934 {
935         /*
936          * We don't free the ring buffer. instead, resize it because
937          * The max_tr ring buffer has some state (e.g. ring->clock) and
938          * we want preserve it.
939          */
940         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
941         set_buffer_entries(&tr->max_buffer, 1);
942         tracing_reset_online_cpus(&tr->max_buffer);
943         tr->allocated_snapshot = false;
944 }
945
946 /**
947  * tracing_alloc_snapshot - allocate snapshot buffer.
948  *
949  * This only allocates the snapshot buffer if it isn't already
950  * allocated - it doesn't also take a snapshot.
951  *
952  * This is meant to be used in cases where the snapshot buffer needs
953  * to be set up for events that can't sleep but need to be able to
954  * trigger a snapshot.
955  */
956 int tracing_alloc_snapshot(void)
957 {
958         struct trace_array *tr = &global_trace;
959         int ret;
960
961         ret = alloc_snapshot(tr);
962         WARN_ON(ret < 0);
963
964         return ret;
965 }
966 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
967
968 /**
969  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
970  *
971  * This is similar to trace_snapshot(), but it will allocate the
972  * snapshot buffer if it isn't already allocated. Use this only
973  * where it is safe to sleep, as the allocation may sleep.
974  *
975  * This causes a swap between the snapshot buffer and the current live
976  * tracing buffer. You can use this to take snapshots of the live
977  * trace when some condition is triggered, but continue to trace.
978  */
979 void tracing_snapshot_alloc(void)
980 {
981         int ret;
982
983         ret = tracing_alloc_snapshot();
984         if (ret < 0)
985                 return;
986
987         tracing_snapshot();
988 }
989 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
990 #else
991 void tracing_snapshot(void)
992 {
993         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
994 }
995 EXPORT_SYMBOL_GPL(tracing_snapshot);
996 int tracing_alloc_snapshot(void)
997 {
998         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
999         return -ENODEV;
1000 }
1001 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1002 void tracing_snapshot_alloc(void)
1003 {
1004         /* Give warning */
1005         tracing_snapshot();
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1008 #endif /* CONFIG_TRACER_SNAPSHOT */
1009
1010 static void tracer_tracing_off(struct trace_array *tr)
1011 {
1012         if (tr->trace_buffer.buffer)
1013                 ring_buffer_record_off(tr->trace_buffer.buffer);
1014         /*
1015          * This flag is looked at when buffers haven't been allocated
1016          * yet, or by some tracers (like irqsoff), that just want to
1017          * know if the ring buffer has been disabled, but it can handle
1018          * races of where it gets disabled but we still do a record.
1019          * As the check is in the fast path of the tracers, it is more
1020          * important to be fast than accurate.
1021          */
1022         tr->buffer_disabled = 1;
1023         /* Make the flag seen by readers */
1024         smp_wmb();
1025 }
1026
1027 /**
1028  * tracing_off - turn off tracing buffers
1029  *
1030  * This function stops the tracing buffers from recording data.
1031  * It does not disable any overhead the tracers themselves may
1032  * be causing. This function simply causes all recording to
1033  * the ring buffers to fail.
1034  */
1035 void tracing_off(void)
1036 {
1037         tracer_tracing_off(&global_trace);
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_off);
1040
1041 void disable_trace_on_warning(void)
1042 {
1043         if (__disable_trace_on_warning)
1044                 tracing_off();
1045 }
1046
1047 /**
1048  * tracer_tracing_is_on - show real state of ring buffer enabled
1049  * @tr : the trace array to know if ring buffer is enabled
1050  *
1051  * Shows real state of the ring buffer if it is enabled or not.
1052  */
1053 int tracer_tracing_is_on(struct trace_array *tr)
1054 {
1055         if (tr->trace_buffer.buffer)
1056                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1057         return !tr->buffer_disabled;
1058 }
1059
1060 /**
1061  * tracing_is_on - show state of ring buffers enabled
1062  */
1063 int tracing_is_on(void)
1064 {
1065         return tracer_tracing_is_on(&global_trace);
1066 }
1067 EXPORT_SYMBOL_GPL(tracing_is_on);
1068
1069 static int __init set_buf_size(char *str)
1070 {
1071         unsigned long buf_size;
1072
1073         if (!str)
1074                 return 0;
1075         buf_size = memparse(str, &str);
1076         /* nr_entries can not be zero */
1077         if (buf_size == 0)
1078                 return 0;
1079         trace_buf_size = buf_size;
1080         return 1;
1081 }
1082 __setup("trace_buf_size=", set_buf_size);
1083
1084 static int __init set_tracing_thresh(char *str)
1085 {
1086         unsigned long threshold;
1087         int ret;
1088
1089         if (!str)
1090                 return 0;
1091         ret = kstrtoul(str, 0, &threshold);
1092         if (ret < 0)
1093                 return 0;
1094         tracing_thresh = threshold * 1000;
1095         return 1;
1096 }
1097 __setup("tracing_thresh=", set_tracing_thresh);
1098
1099 unsigned long nsecs_to_usecs(unsigned long nsecs)
1100 {
1101         return nsecs / 1000;
1102 }
1103
1104 /*
1105  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1106  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1107  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1108  * of strings in the order that the enums were defined.
1109  */
1110 #undef C
1111 #define C(a, b) b
1112
1113 /* These must match the bit postions in trace_iterator_flags */
1114 static const char *trace_options[] = {
1115         TRACE_FLAGS
1116         NULL
1117 };
1118
1119 static struct {
1120         u64 (*func)(void);
1121         const char *name;
1122         int in_ns;              /* is this clock in nanoseconds? */
1123 } trace_clocks[] = {
1124         { trace_clock_local,            "local",        1 },
1125         { trace_clock_global,           "global",       1 },
1126         { trace_clock_counter,          "counter",      0 },
1127         { trace_clock_jiffies,          "uptime",       0 },
1128         { trace_clock,                  "perf",         1 },
1129         { ktime_get_mono_fast_ns,       "mono",         1 },
1130         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1131         ARCH_TRACE_CLOCKS
1132 };
1133
1134 /*
1135  * trace_parser_get_init - gets the buffer for trace parser
1136  */
1137 int trace_parser_get_init(struct trace_parser *parser, int size)
1138 {
1139         memset(parser, 0, sizeof(*parser));
1140
1141         parser->buffer = kmalloc(size, GFP_KERNEL);
1142         if (!parser->buffer)
1143                 return 1;
1144
1145         parser->size = size;
1146         return 0;
1147 }
1148
1149 /*
1150  * trace_parser_put - frees the buffer for trace parser
1151  */
1152 void trace_parser_put(struct trace_parser *parser)
1153 {
1154         kfree(parser->buffer);
1155 }
1156
1157 /*
1158  * trace_get_user - reads the user input string separated by  space
1159  * (matched by isspace(ch))
1160  *
1161  * For each string found the 'struct trace_parser' is updated,
1162  * and the function returns.
1163  *
1164  * Returns number of bytes read.
1165  *
1166  * See kernel/trace/trace.h for 'struct trace_parser' details.
1167  */
1168 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1169         size_t cnt, loff_t *ppos)
1170 {
1171         char ch;
1172         size_t read = 0;
1173         ssize_t ret;
1174
1175         if (!*ppos)
1176                 trace_parser_clear(parser);
1177
1178         ret = get_user(ch, ubuf++);
1179         if (ret)
1180                 goto out;
1181
1182         read++;
1183         cnt--;
1184
1185         /*
1186          * The parser is not finished with the last write,
1187          * continue reading the user input without skipping spaces.
1188          */
1189         if (!parser->cont) {
1190                 /* skip white space */
1191                 while (cnt && isspace(ch)) {
1192                         ret = get_user(ch, ubuf++);
1193                         if (ret)
1194                                 goto out;
1195                         read++;
1196                         cnt--;
1197                 }
1198
1199                 /* only spaces were written */
1200                 if (isspace(ch)) {
1201                         *ppos += read;
1202                         ret = read;
1203                         goto out;
1204                 }
1205
1206                 parser->idx = 0;
1207         }
1208
1209         /* read the non-space input */
1210         while (cnt && !isspace(ch)) {
1211                 if (parser->idx < parser->size - 1)
1212                         parser->buffer[parser->idx++] = ch;
1213                 else {
1214                         ret = -EINVAL;
1215                         goto out;
1216                 }
1217                 ret = get_user(ch, ubuf++);
1218                 if (ret)
1219                         goto out;
1220                 read++;
1221                 cnt--;
1222         }
1223
1224         /* We either got finished input or we have to wait for another call. */
1225         if (isspace(ch)) {
1226                 parser->buffer[parser->idx] = 0;
1227                 parser->cont = false;
1228         } else if (parser->idx < parser->size - 1) {
1229                 parser->cont = true;
1230                 parser->buffer[parser->idx++] = ch;
1231         } else {
1232                 ret = -EINVAL;
1233                 goto out;
1234         }
1235
1236         *ppos += read;
1237         ret = read;
1238
1239 out:
1240         return ret;
1241 }
1242
1243 /* TODO add a seq_buf_to_buffer() */
1244 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1245 {
1246         int len;
1247
1248         if (trace_seq_used(s) <= s->seq.readpos)
1249                 return -EBUSY;
1250
1251         len = trace_seq_used(s) - s->seq.readpos;
1252         if (cnt > len)
1253                 cnt = len;
1254         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1255
1256         s->seq.readpos += cnt;
1257         return cnt;
1258 }
1259
1260 unsigned long __read_mostly     tracing_thresh;
1261
1262 #ifdef CONFIG_TRACER_MAX_TRACE
1263 /*
1264  * Copy the new maximum trace into the separate maximum-trace
1265  * structure. (this way the maximum trace is permanently saved,
1266  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1267  */
1268 static void
1269 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1270 {
1271         struct trace_buffer *trace_buf = &tr->trace_buffer;
1272         struct trace_buffer *max_buf = &tr->max_buffer;
1273         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1274         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1275
1276         max_buf->cpu = cpu;
1277         max_buf->time_start = data->preempt_timestamp;
1278
1279         max_data->saved_latency = tr->max_latency;
1280         max_data->critical_start = data->critical_start;
1281         max_data->critical_end = data->critical_end;
1282
1283         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1284         max_data->pid = tsk->pid;
1285         /*
1286          * If tsk == current, then use current_uid(), as that does not use
1287          * RCU. The irq tracer can be called out of RCU scope.
1288          */
1289         if (tsk == current)
1290                 max_data->uid = current_uid();
1291         else
1292                 max_data->uid = task_uid(tsk);
1293
1294         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1295         max_data->policy = tsk->policy;
1296         max_data->rt_priority = tsk->rt_priority;
1297
1298         /* record this tasks comm */
1299         tracing_record_cmdline(tsk);
1300 }
1301
1302 /**
1303  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1304  * @tr: tracer
1305  * @tsk: the task with the latency
1306  * @cpu: The cpu that initiated the trace.
1307  *
1308  * Flip the buffers between the @tr and the max_tr and record information
1309  * about which task was the cause of this latency.
1310  */
1311 void
1312 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1313 {
1314         struct ring_buffer *buf;
1315
1316         if (tr->stop_count)
1317                 return;
1318
1319         WARN_ON_ONCE(!irqs_disabled());
1320
1321         if (!tr->allocated_snapshot) {
1322                 /* Only the nop tracer should hit this when disabling */
1323                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1324                 return;
1325         }
1326
1327         arch_spin_lock(&tr->max_lock);
1328
1329         /* Inherit the recordable setting from trace_buffer */
1330         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1331                 ring_buffer_record_on(tr->max_buffer.buffer);
1332         else
1333                 ring_buffer_record_off(tr->max_buffer.buffer);
1334
1335         buf = tr->trace_buffer.buffer;
1336         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1337         tr->max_buffer.buffer = buf;
1338
1339         __update_max_tr(tr, tsk, cpu);
1340         arch_spin_unlock(&tr->max_lock);
1341 }
1342
1343 /**
1344  * update_max_tr_single - only copy one trace over, and reset the rest
1345  * @tr - tracer
1346  * @tsk - task with the latency
1347  * @cpu - the cpu of the buffer to copy.
1348  *
1349  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1350  */
1351 void
1352 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         int ret;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360         if (!tr->allocated_snapshot) {
1361                 /* Only the nop tracer should hit this when disabling */
1362                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1363                 return;
1364         }
1365
1366         arch_spin_lock(&tr->max_lock);
1367
1368         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1369
1370         if (ret == -EBUSY) {
1371                 /*
1372                  * We failed to swap the buffer due to a commit taking
1373                  * place on this CPU. We fail to record, but we reset
1374                  * the max trace buffer (no one writes directly to it)
1375                  * and flag that it failed.
1376                  */
1377                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1378                         "Failed to swap buffers due to commit in progress\n");
1379         }
1380
1381         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1382
1383         __update_max_tr(tr, tsk, cpu);
1384         arch_spin_unlock(&tr->max_lock);
1385 }
1386 #endif /* CONFIG_TRACER_MAX_TRACE */
1387
1388 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1389 {
1390         /* Iterators are static, they should be filled or empty */
1391         if (trace_buffer_iter(iter, iter->cpu_file))
1392                 return 0;
1393
1394         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1395                                 full);
1396 }
1397
1398 #ifdef CONFIG_FTRACE_STARTUP_TEST
1399 static int run_tracer_selftest(struct tracer *type)
1400 {
1401         struct trace_array *tr = &global_trace;
1402         struct tracer *saved_tracer = tr->current_trace;
1403         int ret;
1404
1405         if (!type->selftest || tracing_selftest_disabled)
1406                 return 0;
1407
1408         /*
1409          * Run a selftest on this tracer.
1410          * Here we reset the trace buffer, and set the current
1411          * tracer to be this tracer. The tracer can then run some
1412          * internal tracing to verify that everything is in order.
1413          * If we fail, we do not register this tracer.
1414          */
1415         tracing_reset_online_cpus(&tr->trace_buffer);
1416
1417         tr->current_trace = type;
1418
1419 #ifdef CONFIG_TRACER_MAX_TRACE
1420         if (type->use_max_tr) {
1421                 /* If we expanded the buffers, make sure the max is expanded too */
1422                 if (ring_buffer_expanded)
1423                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1424                                            RING_BUFFER_ALL_CPUS);
1425                 tr->allocated_snapshot = true;
1426         }
1427 #endif
1428
1429         /* the test is responsible for initializing and enabling */
1430         pr_info("Testing tracer %s: ", type->name);
1431         ret = type->selftest(type, tr);
1432         /* the test is responsible for resetting too */
1433         tr->current_trace = saved_tracer;
1434         if (ret) {
1435                 printk(KERN_CONT "FAILED!\n");
1436                 /* Add the warning after printing 'FAILED' */
1437                 WARN_ON(1);
1438                 return -1;
1439         }
1440         /* Only reset on passing, to avoid touching corrupted buffers */
1441         tracing_reset_online_cpus(&tr->trace_buffer);
1442
1443 #ifdef CONFIG_TRACER_MAX_TRACE
1444         if (type->use_max_tr) {
1445                 tr->allocated_snapshot = false;
1446
1447                 /* Shrink the max buffer again */
1448                 if (ring_buffer_expanded)
1449                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1450                                            RING_BUFFER_ALL_CPUS);
1451         }
1452 #endif
1453
1454         printk(KERN_CONT "PASSED\n");
1455         return 0;
1456 }
1457 #else
1458 static inline int run_tracer_selftest(struct tracer *type)
1459 {
1460         return 0;
1461 }
1462 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1463
1464 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1465
1466 static void __init apply_trace_boot_options(void);
1467
1468 /**
1469  * register_tracer - register a tracer with the ftrace system.
1470  * @type - the plugin for the tracer
1471  *
1472  * Register a new plugin tracer.
1473  */
1474 int __init register_tracer(struct tracer *type)
1475 {
1476         struct tracer *t;
1477         int ret = 0;
1478
1479         if (!type->name) {
1480                 pr_info("Tracer must have a name\n");
1481                 return -1;
1482         }
1483
1484         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1485                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1486                 return -1;
1487         }
1488
1489         mutex_lock(&trace_types_lock);
1490
1491         tracing_selftest_running = true;
1492
1493         for (t = trace_types; t; t = t->next) {
1494                 if (strcmp(type->name, t->name) == 0) {
1495                         /* already found */
1496                         pr_info("Tracer %s already registered\n",
1497                                 type->name);
1498                         ret = -1;
1499                         goto out;
1500                 }
1501         }
1502
1503         if (!type->set_flag)
1504                 type->set_flag = &dummy_set_flag;
1505         if (!type->flags) {
1506                 /*allocate a dummy tracer_flags*/
1507                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1508                 if (!type->flags) {
1509                         ret = -ENOMEM;
1510                         goto out;
1511                 }
1512                 type->flags->val = 0;
1513                 type->flags->opts = dummy_tracer_opt;
1514         } else
1515                 if (!type->flags->opts)
1516                         type->flags->opts = dummy_tracer_opt;
1517
1518         /* store the tracer for __set_tracer_option */
1519         type->flags->trace = type;
1520
1521         ret = run_tracer_selftest(type);
1522         if (ret < 0)
1523                 goto out;
1524
1525         type->next = trace_types;
1526         trace_types = type;
1527         add_tracer_options(&global_trace, type);
1528
1529  out:
1530         tracing_selftest_running = false;
1531         mutex_unlock(&trace_types_lock);
1532
1533         if (ret || !default_bootup_tracer)
1534                 goto out_unlock;
1535
1536         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1537                 goto out_unlock;
1538
1539         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1540         /* Do we want this tracer to start on bootup? */
1541         tracing_set_tracer(&global_trace, type->name);
1542         default_bootup_tracer = NULL;
1543
1544         apply_trace_boot_options();
1545
1546         /* disable other selftests, since this will break it. */
1547         tracing_selftest_disabled = true;
1548 #ifdef CONFIG_FTRACE_STARTUP_TEST
1549         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1550                type->name);
1551 #endif
1552
1553  out_unlock:
1554         return ret;
1555 }
1556
1557 void tracing_reset(struct trace_buffer *buf, int cpu)
1558 {
1559         struct ring_buffer *buffer = buf->buffer;
1560
1561         if (!buffer)
1562                 return;
1563
1564         ring_buffer_record_disable(buffer);
1565
1566         /* Make sure all commits have finished */
1567         synchronize_sched();
1568         ring_buffer_reset_cpu(buffer, cpu);
1569
1570         ring_buffer_record_enable(buffer);
1571 }
1572
1573 void tracing_reset_online_cpus(struct trace_buffer *buf)
1574 {
1575         struct ring_buffer *buffer = buf->buffer;
1576         int cpu;
1577
1578         if (!buffer)
1579                 return;
1580
1581         ring_buffer_record_disable(buffer);
1582
1583         /* Make sure all commits have finished */
1584         synchronize_sched();
1585
1586         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1587
1588         for_each_online_cpu(cpu)
1589                 ring_buffer_reset_cpu(buffer, cpu);
1590
1591         ring_buffer_record_enable(buffer);
1592 }
1593
1594 /* Must have trace_types_lock held */
1595 void tracing_reset_all_online_cpus(void)
1596 {
1597         struct trace_array *tr;
1598
1599         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1600                 tracing_reset_online_cpus(&tr->trace_buffer);
1601 #ifdef CONFIG_TRACER_MAX_TRACE
1602                 tracing_reset_online_cpus(&tr->max_buffer);
1603 #endif
1604         }
1605 }
1606
1607 #define SAVED_CMDLINES_DEFAULT 128
1608 #define NO_CMDLINE_MAP UINT_MAX
1609 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1610 struct saved_cmdlines_buffer {
1611         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1612         unsigned *map_cmdline_to_pid;
1613         unsigned cmdline_num;
1614         int cmdline_idx;
1615         char *saved_cmdlines;
1616 };
1617 static struct saved_cmdlines_buffer *savedcmd;
1618
1619 static inline char *get_saved_cmdlines(int idx)
1620 {
1621         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1622 }
1623
1624 static inline void set_cmdline(int idx, const char *cmdline)
1625 {
1626         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1627 }
1628
1629 static int allocate_cmdlines_buffer(unsigned int val,
1630                                     struct saved_cmdlines_buffer *s)
1631 {
1632         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1633                                         GFP_KERNEL);
1634         if (!s->map_cmdline_to_pid)
1635                 return -ENOMEM;
1636
1637         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1638         if (!s->saved_cmdlines) {
1639                 kfree(s->map_cmdline_to_pid);
1640                 return -ENOMEM;
1641         }
1642
1643         s->cmdline_idx = 0;
1644         s->cmdline_num = val;
1645         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1646                sizeof(s->map_pid_to_cmdline));
1647         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1648                val * sizeof(*s->map_cmdline_to_pid));
1649
1650         return 0;
1651 }
1652
1653 static int trace_create_savedcmd(void)
1654 {
1655         int ret;
1656
1657         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1658         if (!savedcmd)
1659                 return -ENOMEM;
1660
1661         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1662         if (ret < 0) {
1663                 kfree(savedcmd);
1664                 savedcmd = NULL;
1665                 return -ENOMEM;
1666         }
1667
1668         return 0;
1669 }
1670
1671 int is_tracing_stopped(void)
1672 {
1673         return global_trace.stop_count;
1674 }
1675
1676 /**
1677  * tracing_start - quick start of the tracer
1678  *
1679  * If tracing is enabled but was stopped by tracing_stop,
1680  * this will start the tracer back up.
1681  */
1682 void tracing_start(void)
1683 {
1684         struct ring_buffer *buffer;
1685         unsigned long flags;
1686
1687         if (tracing_disabled)
1688                 return;
1689
1690         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1691         if (--global_trace.stop_count) {
1692                 if (global_trace.stop_count < 0) {
1693                         /* Someone screwed up their debugging */
1694                         WARN_ON_ONCE(1);
1695                         global_trace.stop_count = 0;
1696                 }
1697                 goto out;
1698         }
1699
1700         /* Prevent the buffers from switching */
1701         arch_spin_lock(&global_trace.max_lock);
1702
1703         buffer = global_trace.trace_buffer.buffer;
1704         if (buffer)
1705                 ring_buffer_record_enable(buffer);
1706
1707 #ifdef CONFIG_TRACER_MAX_TRACE
1708         buffer = global_trace.max_buffer.buffer;
1709         if (buffer)
1710                 ring_buffer_record_enable(buffer);
1711 #endif
1712
1713         arch_spin_unlock(&global_trace.max_lock);
1714
1715  out:
1716         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1717 }
1718
1719 static void tracing_start_tr(struct trace_array *tr)
1720 {
1721         struct ring_buffer *buffer;
1722         unsigned long flags;
1723
1724         if (tracing_disabled)
1725                 return;
1726
1727         /* If global, we need to also start the max tracer */
1728         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1729                 return tracing_start();
1730
1731         raw_spin_lock_irqsave(&tr->start_lock, flags);
1732
1733         if (--tr->stop_count) {
1734                 if (tr->stop_count < 0) {
1735                         /* Someone screwed up their debugging */
1736                         WARN_ON_ONCE(1);
1737                         tr->stop_count = 0;
1738                 }
1739                 goto out;
1740         }
1741
1742         buffer = tr->trace_buffer.buffer;
1743         if (buffer)
1744                 ring_buffer_record_enable(buffer);
1745
1746  out:
1747         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1748 }
1749
1750 /**
1751  * tracing_stop - quick stop of the tracer
1752  *
1753  * Light weight way to stop tracing. Use in conjunction with
1754  * tracing_start.
1755  */
1756 void tracing_stop(void)
1757 {
1758         struct ring_buffer *buffer;
1759         unsigned long flags;
1760
1761         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1762         if (global_trace.stop_count++)
1763                 goto out;
1764
1765         /* Prevent the buffers from switching */
1766         arch_spin_lock(&global_trace.max_lock);
1767
1768         buffer = global_trace.trace_buffer.buffer;
1769         if (buffer)
1770                 ring_buffer_record_disable(buffer);
1771
1772 #ifdef CONFIG_TRACER_MAX_TRACE
1773         buffer = global_trace.max_buffer.buffer;
1774         if (buffer)
1775                 ring_buffer_record_disable(buffer);
1776 #endif
1777
1778         arch_spin_unlock(&global_trace.max_lock);
1779
1780  out:
1781         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1782 }
1783
1784 static void tracing_stop_tr(struct trace_array *tr)
1785 {
1786         struct ring_buffer *buffer;
1787         unsigned long flags;
1788
1789         /* If global, we need to also stop the max tracer */
1790         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1791                 return tracing_stop();
1792
1793         raw_spin_lock_irqsave(&tr->start_lock, flags);
1794         if (tr->stop_count++)
1795                 goto out;
1796
1797         buffer = tr->trace_buffer.buffer;
1798         if (buffer)
1799                 ring_buffer_record_disable(buffer);
1800
1801  out:
1802         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1803 }
1804
1805 void trace_stop_cmdline_recording(void);
1806
1807 static int trace_save_cmdline(struct task_struct *tsk)
1808 {
1809         unsigned tpid, idx;
1810
1811         /* treat recording of idle task as a success */
1812         if (!tsk->pid)
1813                 return 1;
1814
1815         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1816
1817         /*
1818          * It's not the end of the world if we don't get
1819          * the lock, but we also don't want to spin
1820          * nor do we want to disable interrupts,
1821          * so if we miss here, then better luck next time.
1822          */
1823         if (!arch_spin_trylock(&trace_cmdline_lock))
1824                 return 0;
1825
1826         idx = savedcmd->map_pid_to_cmdline[tpid];
1827         if (idx == NO_CMDLINE_MAP) {
1828                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1829
1830                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1831                 savedcmd->cmdline_idx = idx;
1832         }
1833
1834         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1835         set_cmdline(idx, tsk->comm);
1836
1837         arch_spin_unlock(&trace_cmdline_lock);
1838
1839         return 1;
1840 }
1841
1842 static void __trace_find_cmdline(int pid, char comm[])
1843 {
1844         unsigned map;
1845         int tpid;
1846
1847         if (!pid) {
1848                 strcpy(comm, "<idle>");
1849                 return;
1850         }
1851
1852         if (WARN_ON_ONCE(pid < 0)) {
1853                 strcpy(comm, "<XXX>");
1854                 return;
1855         }
1856
1857         tpid = pid & (PID_MAX_DEFAULT - 1);
1858         map = savedcmd->map_pid_to_cmdline[tpid];
1859         if (map != NO_CMDLINE_MAP) {
1860                 tpid = savedcmd->map_cmdline_to_pid[map];
1861                 if (tpid == pid) {
1862                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1863                         return;
1864                 }
1865         }
1866         strcpy(comm, "<...>");
1867 }
1868
1869 void trace_find_cmdline(int pid, char comm[])
1870 {
1871         preempt_disable();
1872         arch_spin_lock(&trace_cmdline_lock);
1873
1874         __trace_find_cmdline(pid, comm);
1875
1876         arch_spin_unlock(&trace_cmdline_lock);
1877         preempt_enable();
1878 }
1879
1880 void tracing_record_cmdline(struct task_struct *tsk)
1881 {
1882         if (!__this_cpu_read(trace_cmdline_save))
1883                 return;
1884
1885         if (trace_save_cmdline(tsk))
1886                 __this_cpu_write(trace_cmdline_save, false);
1887 }
1888
1889 void
1890 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1891                              int pc)
1892 {
1893         struct task_struct *tsk = current;
1894
1895         entry->preempt_count            = pc & 0xff;
1896         entry->pid                      = (tsk) ? tsk->pid : 0;
1897         entry->flags =
1898 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1899                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1900 #else
1901                 TRACE_FLAG_IRQS_NOSUPPORT |
1902 #endif
1903                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1904                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1905                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1906                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1907                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1908 }
1909 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1910
1911 static __always_inline void
1912 trace_event_setup(struct ring_buffer_event *event,
1913                   int type, unsigned long flags, int pc)
1914 {
1915         struct trace_entry *ent = ring_buffer_event_data(event);
1916
1917         tracing_generic_entry_update(ent, flags, pc);
1918         ent->type = type;
1919 }
1920
1921 struct ring_buffer_event *
1922 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1923                           int type,
1924                           unsigned long len,
1925                           unsigned long flags, int pc)
1926 {
1927         struct ring_buffer_event *event;
1928
1929         event = ring_buffer_lock_reserve(buffer, len);
1930         if (event != NULL)
1931                 trace_event_setup(event, type, flags, pc);
1932
1933         return event;
1934 }
1935
1936 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1937 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1938 static int trace_buffered_event_ref;
1939
1940 /**
1941  * trace_buffered_event_enable - enable buffering events
1942  *
1943  * When events are being filtered, it is quicker to use a temporary
1944  * buffer to write the event data into if there's a likely chance
1945  * that it will not be committed. The discard of the ring buffer
1946  * is not as fast as committing, and is much slower than copying
1947  * a commit.
1948  *
1949  * When an event is to be filtered, allocate per cpu buffers to
1950  * write the event data into, and if the event is filtered and discarded
1951  * it is simply dropped, otherwise, the entire data is to be committed
1952  * in one shot.
1953  */
1954 void trace_buffered_event_enable(void)
1955 {
1956         struct ring_buffer_event *event;
1957         struct page *page;
1958         int cpu;
1959
1960         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1961
1962         if (trace_buffered_event_ref++)
1963                 return;
1964
1965         for_each_tracing_cpu(cpu) {
1966                 page = alloc_pages_node(cpu_to_node(cpu),
1967                                         GFP_KERNEL | __GFP_NORETRY, 0);
1968                 if (!page)
1969                         goto failed;
1970
1971                 event = page_address(page);
1972                 memset(event, 0, sizeof(*event));
1973
1974                 per_cpu(trace_buffered_event, cpu) = event;
1975
1976                 preempt_disable();
1977                 if (cpu == smp_processor_id() &&
1978                     this_cpu_read(trace_buffered_event) !=
1979                     per_cpu(trace_buffered_event, cpu))
1980                         WARN_ON_ONCE(1);
1981                 preempt_enable();
1982         }
1983
1984         return;
1985  failed:
1986         trace_buffered_event_disable();
1987 }
1988
1989 static void enable_trace_buffered_event(void *data)
1990 {
1991         /* Probably not needed, but do it anyway */
1992         smp_rmb();
1993         this_cpu_dec(trace_buffered_event_cnt);
1994 }
1995
1996 static void disable_trace_buffered_event(void *data)
1997 {
1998         this_cpu_inc(trace_buffered_event_cnt);
1999 }
2000
2001 /**
2002  * trace_buffered_event_disable - disable buffering events
2003  *
2004  * When a filter is removed, it is faster to not use the buffered
2005  * events, and to commit directly into the ring buffer. Free up
2006  * the temp buffers when there are no more users. This requires
2007  * special synchronization with current events.
2008  */
2009 void trace_buffered_event_disable(void)
2010 {
2011         int cpu;
2012
2013         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2014
2015         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2016                 return;
2017
2018         if (--trace_buffered_event_ref)
2019                 return;
2020
2021         preempt_disable();
2022         /* For each CPU, set the buffer as used. */
2023         smp_call_function_many(tracing_buffer_mask,
2024                                disable_trace_buffered_event, NULL, 1);
2025         preempt_enable();
2026
2027         /* Wait for all current users to finish */
2028         synchronize_sched();
2029
2030         for_each_tracing_cpu(cpu) {
2031                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2032                 per_cpu(trace_buffered_event, cpu) = NULL;
2033         }
2034         /*
2035          * Make sure trace_buffered_event is NULL before clearing
2036          * trace_buffered_event_cnt.
2037          */
2038         smp_wmb();
2039
2040         preempt_disable();
2041         /* Do the work on each cpu */
2042         smp_call_function_many(tracing_buffer_mask,
2043                                enable_trace_buffered_event, NULL, 1);
2044         preempt_enable();
2045 }
2046
2047 void
2048 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2049 {
2050         __this_cpu_write(trace_cmdline_save, true);
2051
2052         /* If this is the temp buffer, we need to commit fully */
2053         if (this_cpu_read(trace_buffered_event) == event) {
2054                 /* Length is in event->array[0] */
2055                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2056                 /* Release the temp buffer */
2057                 this_cpu_dec(trace_buffered_event_cnt);
2058         } else
2059                 ring_buffer_unlock_commit(buffer, event);
2060 }
2061
2062 static struct ring_buffer *temp_buffer;
2063
2064 struct ring_buffer_event *
2065 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2066                           struct trace_event_file *trace_file,
2067                           int type, unsigned long len,
2068                           unsigned long flags, int pc)
2069 {
2070         struct ring_buffer_event *entry;
2071         int val;
2072
2073         *current_rb = trace_file->tr->trace_buffer.buffer;
2074
2075         if ((trace_file->flags &
2076              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2077             (entry = this_cpu_read(trace_buffered_event))) {
2078                 /* Try to use the per cpu buffer first */
2079                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2080                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2081                         trace_event_setup(entry, type, flags, pc);
2082                         entry->array[0] = len;
2083                         return entry;
2084                 }
2085                 this_cpu_dec(trace_buffered_event_cnt);
2086         }
2087
2088         entry = trace_buffer_lock_reserve(*current_rb,
2089                                          type, len, flags, pc);
2090         /*
2091          * If tracing is off, but we have triggers enabled
2092          * we still need to look at the event data. Use the temp_buffer
2093          * to store the trace event for the tigger to use. It's recusive
2094          * safe and will not be recorded anywhere.
2095          */
2096         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2097                 *current_rb = temp_buffer;
2098                 entry = trace_buffer_lock_reserve(*current_rb,
2099                                                   type, len, flags, pc);
2100         }
2101         return entry;
2102 }
2103 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2104
2105 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2106                                      struct ring_buffer *buffer,
2107                                      struct ring_buffer_event *event,
2108                                      unsigned long flags, int pc,
2109                                      struct pt_regs *regs)
2110 {
2111         __buffer_unlock_commit(buffer, event);
2112
2113         /*
2114          * If regs is not set, then skip the following callers:
2115          *   trace_buffer_unlock_commit_regs
2116          *   event_trigger_unlock_commit
2117          *   trace_event_buffer_commit
2118          *   trace_event_raw_event_sched_switch
2119          * Note, we can still get here via blktrace, wakeup tracer
2120          * and mmiotrace, but that's ok if they lose a function or
2121          * two. They are that meaningful.
2122          */
2123         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2124         ftrace_trace_userstack(tr, buffer, flags, pc);
2125 }
2126
2127 void
2128 trace_function(struct trace_array *tr,
2129                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2130                int pc)
2131 {
2132         struct trace_event_call *call = &event_function;
2133         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2134         struct ring_buffer_event *event;
2135         struct ftrace_entry *entry;
2136
2137         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2138                                           flags, pc);
2139         if (!event)
2140                 return;
2141         entry   = ring_buffer_event_data(event);
2142         entry->ip                       = ip;
2143         entry->parent_ip                = parent_ip;
2144
2145         if (!call_filter_check_discard(call, entry, buffer, event))
2146                 __buffer_unlock_commit(buffer, event);
2147 }
2148
2149 #ifdef CONFIG_STACKTRACE
2150
2151 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2152 struct ftrace_stack {
2153         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2154 };
2155
2156 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2157 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2158
2159 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2160                                  unsigned long flags,
2161                                  int skip, int pc, struct pt_regs *regs)
2162 {
2163         struct trace_event_call *call = &event_kernel_stack;
2164         struct ring_buffer_event *event;
2165         struct stack_entry *entry;
2166         struct stack_trace trace;
2167         int use_stack;
2168         int size = FTRACE_STACK_ENTRIES;
2169
2170         trace.nr_entries        = 0;
2171         trace.skip              = skip;
2172
2173         /*
2174          * Add two, for this function and the call to save_stack_trace()
2175          * If regs is set, then these functions will not be in the way.
2176          */
2177         if (!regs)
2178                 trace.skip += 2;
2179
2180         /*
2181          * Since events can happen in NMIs there's no safe way to
2182          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2183          * or NMI comes in, it will just have to use the default
2184          * FTRACE_STACK_SIZE.
2185          */
2186         preempt_disable_notrace();
2187
2188         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2189         /*
2190          * We don't need any atomic variables, just a barrier.
2191          * If an interrupt comes in, we don't care, because it would
2192          * have exited and put the counter back to what we want.
2193          * We just need a barrier to keep gcc from moving things
2194          * around.
2195          */
2196         barrier();
2197         if (use_stack == 1) {
2198                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2199                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2200
2201                 if (regs)
2202                         save_stack_trace_regs(regs, &trace);
2203                 else
2204                         save_stack_trace(&trace);
2205
2206                 if (trace.nr_entries > size)
2207                         size = trace.nr_entries;
2208         } else
2209                 /* From now on, use_stack is a boolean */
2210                 use_stack = 0;
2211
2212         size *= sizeof(unsigned long);
2213
2214         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2215                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2216                                     flags, pc);
2217         if (!event)
2218                 goto out;
2219         entry = ring_buffer_event_data(event);
2220
2221         memset(&entry->caller, 0, size);
2222
2223         if (use_stack)
2224                 memcpy(&entry->caller, trace.entries,
2225                        trace.nr_entries * sizeof(unsigned long));
2226         else {
2227                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2228                 trace.entries           = entry->caller;
2229                 if (regs)
2230                         save_stack_trace_regs(regs, &trace);
2231                 else
2232                         save_stack_trace(&trace);
2233         }
2234
2235         entry->size = trace.nr_entries;
2236
2237         if (!call_filter_check_discard(call, entry, buffer, event))
2238                 __buffer_unlock_commit(buffer, event);
2239
2240  out:
2241         /* Again, don't let gcc optimize things here */
2242         barrier();
2243         __this_cpu_dec(ftrace_stack_reserve);
2244         preempt_enable_notrace();
2245
2246 }
2247
2248 static inline void ftrace_trace_stack(struct trace_array *tr,
2249                                       struct ring_buffer *buffer,
2250                                       unsigned long flags,
2251                                       int skip, int pc, struct pt_regs *regs)
2252 {
2253         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2254                 return;
2255
2256         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2257 }
2258
2259 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2260                    int pc)
2261 {
2262         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2263 }
2264
2265 /**
2266  * trace_dump_stack - record a stack back trace in the trace buffer
2267  * @skip: Number of functions to skip (helper handlers)
2268  */
2269 void trace_dump_stack(int skip)
2270 {
2271         unsigned long flags;
2272
2273         if (tracing_disabled || tracing_selftest_running)
2274                 return;
2275
2276         local_save_flags(flags);
2277
2278         /*
2279          * Skip 3 more, seems to get us at the caller of
2280          * this function.
2281          */
2282         skip += 3;
2283         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2284                              flags, skip, preempt_count(), NULL);
2285 }
2286
2287 static DEFINE_PER_CPU(int, user_stack_count);
2288
2289 void
2290 ftrace_trace_userstack(struct trace_array *tr,
2291                        struct ring_buffer *buffer, unsigned long flags, int pc)
2292 {
2293         struct trace_event_call *call = &event_user_stack;
2294         struct ring_buffer_event *event;
2295         struct userstack_entry *entry;
2296         struct stack_trace trace;
2297
2298         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2299                 return;
2300
2301         /*
2302          * NMIs can not handle page faults, even with fix ups.
2303          * The save user stack can (and often does) fault.
2304          */
2305         if (unlikely(in_nmi()))
2306                 return;
2307
2308         /*
2309          * prevent recursion, since the user stack tracing may
2310          * trigger other kernel events.
2311          */
2312         preempt_disable();
2313         if (__this_cpu_read(user_stack_count))
2314                 goto out;
2315
2316         __this_cpu_inc(user_stack_count);
2317
2318         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2319                                           sizeof(*entry), flags, pc);
2320         if (!event)
2321                 goto out_drop_count;
2322         entry   = ring_buffer_event_data(event);
2323
2324         entry->tgid             = current->tgid;
2325         memset(&entry->caller, 0, sizeof(entry->caller));
2326
2327         trace.nr_entries        = 0;
2328         trace.max_entries       = FTRACE_STACK_ENTRIES;
2329         trace.skip              = 0;
2330         trace.entries           = entry->caller;
2331
2332         save_stack_trace_user(&trace);
2333         if (!call_filter_check_discard(call, entry, buffer, event))
2334                 __buffer_unlock_commit(buffer, event);
2335
2336  out_drop_count:
2337         __this_cpu_dec(user_stack_count);
2338  out:
2339         preempt_enable();
2340 }
2341
2342 #ifdef UNUSED
2343 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2344 {
2345         ftrace_trace_userstack(tr, flags, preempt_count());
2346 }
2347 #endif /* UNUSED */
2348
2349 #endif /* CONFIG_STACKTRACE */
2350
2351 /* created for use with alloc_percpu */
2352 struct trace_buffer_struct {
2353         int nesting;
2354         char buffer[4][TRACE_BUF_SIZE];
2355 };
2356
2357 static struct trace_buffer_struct *trace_percpu_buffer;
2358
2359 /*
2360  * Thise allows for lockless recording.  If we're nested too deeply, then
2361  * this returns NULL.
2362  */
2363 static char *get_trace_buf(void)
2364 {
2365         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2366
2367         if (!buffer || buffer->nesting >= 4)
2368                 return NULL;
2369
2370         buffer->nesting++;
2371
2372         /* Interrupts must see nesting incremented before we use the buffer */
2373         barrier();
2374         return &buffer->buffer[buffer->nesting - 1][0];
2375 }
2376
2377 static void put_trace_buf(void)
2378 {
2379         /* Don't let the decrement of nesting leak before this */
2380         barrier();
2381         this_cpu_dec(trace_percpu_buffer->nesting);
2382 }
2383
2384 static int alloc_percpu_trace_buffer(void)
2385 {
2386         struct trace_buffer_struct *buffers;
2387
2388         buffers = alloc_percpu(struct trace_buffer_struct);
2389         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2390                 return -ENOMEM;
2391
2392         trace_percpu_buffer = buffers;
2393         return 0;
2394 }
2395
2396 static int buffers_allocated;
2397
2398 void trace_printk_init_buffers(void)
2399 {
2400         if (buffers_allocated)
2401                 return;
2402
2403         if (alloc_percpu_trace_buffer())
2404                 return;
2405
2406         /* trace_printk() is for debug use only. Don't use it in production. */
2407
2408         pr_warn("\n");
2409         pr_warn("**********************************************************\n");
2410         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2411         pr_warn("**                                                      **\n");
2412         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2413         pr_warn("**                                                      **\n");
2414         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2415         pr_warn("** unsafe for production use.                           **\n");
2416         pr_warn("**                                                      **\n");
2417         pr_warn("** If you see this message and you are not debugging    **\n");
2418         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2419         pr_warn("**                                                      **\n");
2420         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2421         pr_warn("**********************************************************\n");
2422
2423         /* Expand the buffers to set size */
2424         tracing_update_buffers();
2425
2426         buffers_allocated = 1;
2427
2428         /*
2429          * trace_printk_init_buffers() can be called by modules.
2430          * If that happens, then we need to start cmdline recording
2431          * directly here. If the global_trace.buffer is already
2432          * allocated here, then this was called by module code.
2433          */
2434         if (global_trace.trace_buffer.buffer)
2435                 tracing_start_cmdline_record();
2436 }
2437
2438 void trace_printk_start_comm(void)
2439 {
2440         /* Start tracing comms if trace printk is set */
2441         if (!buffers_allocated)
2442                 return;
2443         tracing_start_cmdline_record();
2444 }
2445
2446 static void trace_printk_start_stop_comm(int enabled)
2447 {
2448         if (!buffers_allocated)
2449                 return;
2450
2451         if (enabled)
2452                 tracing_start_cmdline_record();
2453         else
2454                 tracing_stop_cmdline_record();
2455 }
2456
2457 /**
2458  * trace_vbprintk - write binary msg to tracing buffer
2459  *
2460  */
2461 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2462 {
2463         struct trace_event_call *call = &event_bprint;
2464         struct ring_buffer_event *event;
2465         struct ring_buffer *buffer;
2466         struct trace_array *tr = &global_trace;
2467         struct bprint_entry *entry;
2468         unsigned long flags;
2469         char *tbuffer;
2470         int len = 0, size, pc;
2471
2472         if (unlikely(tracing_selftest_running || tracing_disabled))
2473                 return 0;
2474
2475         /* Don't pollute graph traces with trace_vprintk internals */
2476         pause_graph_tracing();
2477
2478         pc = preempt_count();
2479         preempt_disable_notrace();
2480
2481         tbuffer = get_trace_buf();
2482         if (!tbuffer) {
2483                 len = 0;
2484                 goto out_nobuffer;
2485         }
2486
2487         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2488
2489         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2490                 goto out;
2491
2492         local_save_flags(flags);
2493         size = sizeof(*entry) + sizeof(u32) * len;
2494         buffer = tr->trace_buffer.buffer;
2495         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2496                                           flags, pc);
2497         if (!event)
2498                 goto out;
2499         entry = ring_buffer_event_data(event);
2500         entry->ip                       = ip;
2501         entry->fmt                      = fmt;
2502
2503         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2504         if (!call_filter_check_discard(call, entry, buffer, event)) {
2505                 __buffer_unlock_commit(buffer, event);
2506                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2507         }
2508
2509 out:
2510         put_trace_buf();
2511
2512 out_nobuffer:
2513         preempt_enable_notrace();
2514         unpause_graph_tracing();
2515
2516         return len;
2517 }
2518 EXPORT_SYMBOL_GPL(trace_vbprintk);
2519
2520 __printf(3, 0)
2521 static int
2522 __trace_array_vprintk(struct ring_buffer *buffer,
2523                       unsigned long ip, const char *fmt, va_list args)
2524 {
2525         struct trace_event_call *call = &event_print;
2526         struct ring_buffer_event *event;
2527         int len = 0, size, pc;
2528         struct print_entry *entry;
2529         unsigned long flags;
2530         char *tbuffer;
2531
2532         if (tracing_disabled || tracing_selftest_running)
2533                 return 0;
2534
2535         /* Don't pollute graph traces with trace_vprintk internals */
2536         pause_graph_tracing();
2537
2538         pc = preempt_count();
2539         preempt_disable_notrace();
2540
2541
2542         tbuffer = get_trace_buf();
2543         if (!tbuffer) {
2544                 len = 0;
2545                 goto out_nobuffer;
2546         }
2547
2548         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2549
2550         local_save_flags(flags);
2551         size = sizeof(*entry) + len + 1;
2552         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2553                                           flags, pc);
2554         if (!event)
2555                 goto out;
2556         entry = ring_buffer_event_data(event);
2557         entry->ip = ip;
2558
2559         memcpy(&entry->buf, tbuffer, len + 1);
2560         if (!call_filter_check_discard(call, entry, buffer, event)) {
2561                 __buffer_unlock_commit(buffer, event);
2562                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2563         }
2564
2565 out:
2566         put_trace_buf();
2567
2568 out_nobuffer:
2569         preempt_enable_notrace();
2570         unpause_graph_tracing();
2571
2572         return len;
2573 }
2574
2575 __printf(3, 0)
2576 int trace_array_vprintk(struct trace_array *tr,
2577                         unsigned long ip, const char *fmt, va_list args)
2578 {
2579         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2580 }
2581
2582 __printf(3, 0)
2583 int trace_array_printk(struct trace_array *tr,
2584                        unsigned long ip, const char *fmt, ...)
2585 {
2586         int ret;
2587         va_list ap;
2588
2589         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2590                 return 0;
2591
2592         if (!tr)
2593                 return -ENOENT;
2594
2595         va_start(ap, fmt);
2596         ret = trace_array_vprintk(tr, ip, fmt, ap);
2597         va_end(ap);
2598         return ret;
2599 }
2600
2601 __printf(3, 4)
2602 int trace_array_printk_buf(struct ring_buffer *buffer,
2603                            unsigned long ip, const char *fmt, ...)
2604 {
2605         int ret;
2606         va_list ap;
2607
2608         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2609                 return 0;
2610
2611         va_start(ap, fmt);
2612         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2613         va_end(ap);
2614         return ret;
2615 }
2616
2617 __printf(2, 0)
2618 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2619 {
2620         return trace_array_vprintk(&global_trace, ip, fmt, args);
2621 }
2622 EXPORT_SYMBOL_GPL(trace_vprintk);
2623
2624 static void trace_iterator_increment(struct trace_iterator *iter)
2625 {
2626         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2627
2628         iter->idx++;
2629         if (buf_iter)
2630                 ring_buffer_read(buf_iter, NULL);
2631 }
2632
2633 static struct trace_entry *
2634 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2635                 unsigned long *lost_events)
2636 {
2637         struct ring_buffer_event *event;
2638         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2639
2640         if (buf_iter)
2641                 event = ring_buffer_iter_peek(buf_iter, ts);
2642         else
2643                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2644                                          lost_events);
2645
2646         if (event) {
2647                 iter->ent_size = ring_buffer_event_length(event);
2648                 return ring_buffer_event_data(event);
2649         }
2650         iter->ent_size = 0;
2651         return NULL;
2652 }
2653
2654 static struct trace_entry *
2655 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2656                   unsigned long *missing_events, u64 *ent_ts)
2657 {
2658         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2659         struct trace_entry *ent, *next = NULL;
2660         unsigned long lost_events = 0, next_lost = 0;
2661         int cpu_file = iter->cpu_file;
2662         u64 next_ts = 0, ts;
2663         int next_cpu = -1;
2664         int next_size = 0;
2665         int cpu;
2666
2667         /*
2668          * If we are in a per_cpu trace file, don't bother by iterating over
2669          * all cpu and peek directly.
2670          */
2671         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2672                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2673                         return NULL;
2674                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2675                 if (ent_cpu)
2676                         *ent_cpu = cpu_file;
2677
2678                 return ent;
2679         }
2680
2681         for_each_tracing_cpu(cpu) {
2682
2683                 if (ring_buffer_empty_cpu(buffer, cpu))
2684                         continue;
2685
2686                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2687
2688                 /*
2689                  * Pick the entry with the smallest timestamp:
2690                  */
2691                 if (ent && (!next || ts < next_ts)) {
2692                         next = ent;
2693                         next_cpu = cpu;
2694                         next_ts = ts;
2695                         next_lost = lost_events;
2696                         next_size = iter->ent_size;
2697                 }
2698         }
2699
2700         iter->ent_size = next_size;
2701
2702         if (ent_cpu)
2703                 *ent_cpu = next_cpu;
2704
2705         if (ent_ts)
2706                 *ent_ts = next_ts;
2707
2708         if (missing_events)
2709                 *missing_events = next_lost;
2710
2711         return next;
2712 }
2713
2714 /* Find the next real entry, without updating the iterator itself */
2715 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2716                                           int *ent_cpu, u64 *ent_ts)
2717 {
2718         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2719 }
2720
2721 /* Find the next real entry, and increment the iterator to the next entry */
2722 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2723 {
2724         iter->ent = __find_next_entry(iter, &iter->cpu,
2725                                       &iter->lost_events, &iter->ts);
2726
2727         if (iter->ent)
2728                 trace_iterator_increment(iter);
2729
2730         return iter->ent ? iter : NULL;
2731 }
2732
2733 static void trace_consume(struct trace_iterator *iter)
2734 {
2735         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2736                             &iter->lost_events);
2737 }
2738
2739 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2740 {
2741         struct trace_iterator *iter = m->private;
2742         int i = (int)*pos;
2743         void *ent;
2744
2745         WARN_ON_ONCE(iter->leftover);
2746
2747         (*pos)++;
2748
2749         /* can't go backwards */
2750         if (iter->idx > i)
2751                 return NULL;
2752
2753         if (iter->idx < 0)
2754                 ent = trace_find_next_entry_inc(iter);
2755         else
2756                 ent = iter;
2757
2758         while (ent && iter->idx < i)
2759                 ent = trace_find_next_entry_inc(iter);
2760
2761         iter->pos = *pos;
2762
2763         return ent;
2764 }
2765
2766 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2767 {
2768         struct ring_buffer_event *event;
2769         struct ring_buffer_iter *buf_iter;
2770         unsigned long entries = 0;
2771         u64 ts;
2772
2773         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2774
2775         buf_iter = trace_buffer_iter(iter, cpu);
2776         if (!buf_iter)
2777                 return;
2778
2779         ring_buffer_iter_reset(buf_iter);
2780
2781         /*
2782          * We could have the case with the max latency tracers
2783          * that a reset never took place on a cpu. This is evident
2784          * by the timestamp being before the start of the buffer.
2785          */
2786         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2787                 if (ts >= iter->trace_buffer->time_start)
2788                         break;
2789                 entries++;
2790                 ring_buffer_read(buf_iter, NULL);
2791         }
2792
2793         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2794 }
2795
2796 /*
2797  * The current tracer is copied to avoid a global locking
2798  * all around.
2799  */
2800 static void *s_start(struct seq_file *m, loff_t *pos)
2801 {
2802         struct trace_iterator *iter = m->private;
2803         struct trace_array *tr = iter->tr;
2804         int cpu_file = iter->cpu_file;
2805         void *p = NULL;
2806         loff_t l = 0;
2807         int cpu;
2808
2809         /*
2810          * copy the tracer to avoid using a global lock all around.
2811          * iter->trace is a copy of current_trace, the pointer to the
2812          * name may be used instead of a strcmp(), as iter->trace->name
2813          * will point to the same string as current_trace->name.
2814          */
2815         mutex_lock(&trace_types_lock);
2816         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2817                 *iter->trace = *tr->current_trace;
2818         mutex_unlock(&trace_types_lock);
2819
2820 #ifdef CONFIG_TRACER_MAX_TRACE
2821         if (iter->snapshot && iter->trace->use_max_tr)
2822                 return ERR_PTR(-EBUSY);
2823 #endif
2824
2825         if (*pos != iter->pos) {
2826                 iter->ent = NULL;
2827                 iter->cpu = 0;
2828                 iter->idx = -1;
2829
2830                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2831                         for_each_tracing_cpu(cpu)
2832                                 tracing_iter_reset(iter, cpu);
2833                 } else
2834                         tracing_iter_reset(iter, cpu_file);
2835
2836                 iter->leftover = 0;
2837                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2838                         ;
2839
2840         } else {
2841                 /*
2842                  * If we overflowed the seq_file before, then we want
2843                  * to just reuse the trace_seq buffer again.
2844                  */
2845                 if (iter->leftover)
2846                         p = iter;
2847                 else {
2848                         l = *pos - 1;
2849                         p = s_next(m, p, &l);
2850                 }
2851         }
2852
2853         trace_event_read_lock();
2854         trace_access_lock(cpu_file);
2855         return p;
2856 }
2857
2858 static void s_stop(struct seq_file *m, void *p)
2859 {
2860         struct trace_iterator *iter = m->private;
2861
2862 #ifdef CONFIG_TRACER_MAX_TRACE
2863         if (iter->snapshot && iter->trace->use_max_tr)
2864                 return;
2865 #endif
2866
2867         trace_access_unlock(iter->cpu_file);
2868         trace_event_read_unlock();
2869 }
2870
2871 static void
2872 get_total_entries(struct trace_buffer *buf,
2873                   unsigned long *total, unsigned long *entries)
2874 {
2875         unsigned long count;
2876         int cpu;
2877
2878         *total = 0;
2879         *entries = 0;
2880
2881         for_each_tracing_cpu(cpu) {
2882                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2883                 /*
2884                  * If this buffer has skipped entries, then we hold all
2885                  * entries for the trace and we need to ignore the
2886                  * ones before the time stamp.
2887                  */
2888                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2889                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2890                         /* total is the same as the entries */
2891                         *total += count;
2892                 } else
2893                         *total += count +
2894                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2895                 *entries += count;
2896         }
2897 }
2898
2899 static void print_lat_help_header(struct seq_file *m)
2900 {
2901         seq_puts(m, "#                  _------=> CPU#            \n"
2902                     "#                 / _-----=> irqs-off        \n"
2903                     "#                | / _----=> need-resched    \n"
2904                     "#                || / _---=> hardirq/softirq \n"
2905                     "#                ||| / _--=> preempt-depth   \n"
2906                     "#                |||| /     delay            \n"
2907                     "#  cmd     pid   ||||| time  |   caller      \n"
2908                     "#     \\   /      |||||  \\    |   /         \n");
2909 }
2910
2911 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2912 {
2913         unsigned long total;
2914         unsigned long entries;
2915
2916         get_total_entries(buf, &total, &entries);
2917         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2918                    entries, total, num_online_cpus());
2919         seq_puts(m, "#\n");
2920 }
2921
2922 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2923 {
2924         print_event_info(buf, m);
2925         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2926                     "#              | |       |          |         |\n");
2927 }
2928
2929 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2930 {
2931         print_event_info(buf, m);
2932         seq_puts(m, "#                              _-----=> irqs-off\n"
2933                     "#                             / _----=> need-resched\n"
2934                     "#                            | / _---=> hardirq/softirq\n"
2935                     "#                            || / _--=> preempt-depth\n"
2936                     "#                            ||| /     delay\n"
2937                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2938                     "#              | |       |   ||||       |         |\n");
2939 }
2940
2941 void
2942 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2943 {
2944         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2945         struct trace_buffer *buf = iter->trace_buffer;
2946         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2947         struct tracer *type = iter->trace;
2948         unsigned long entries;
2949         unsigned long total;
2950         const char *name = "preemption";
2951
2952         name = type->name;
2953
2954         get_total_entries(buf, &total, &entries);
2955
2956         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2957                    name, UTS_RELEASE);
2958         seq_puts(m, "# -----------------------------------"
2959                  "---------------------------------\n");
2960         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2961                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2962                    nsecs_to_usecs(data->saved_latency),
2963                    entries,
2964                    total,
2965                    buf->cpu,
2966 #if defined(CONFIG_PREEMPT_NONE)
2967                    "server",
2968 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2969                    "desktop",
2970 #elif defined(CONFIG_PREEMPT)
2971                    "preempt",
2972 #else
2973                    "unknown",
2974 #endif
2975                    /* These are reserved for later use */
2976                    0, 0, 0, 0);
2977 #ifdef CONFIG_SMP
2978         seq_printf(m, " #P:%d)\n", num_online_cpus());
2979 #else
2980         seq_puts(m, ")\n");
2981 #endif
2982         seq_puts(m, "#    -----------------\n");
2983         seq_printf(m, "#    | task: %.16s-%d "
2984                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2985                    data->comm, data->pid,
2986                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2987                    data->policy, data->rt_priority);
2988         seq_puts(m, "#    -----------------\n");
2989
2990         if (data->critical_start) {
2991                 seq_puts(m, "#  => started at: ");
2992                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2993                 trace_print_seq(m, &iter->seq);
2994                 seq_puts(m, "\n#  => ended at:   ");
2995                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2996                 trace_print_seq(m, &iter->seq);
2997                 seq_puts(m, "\n#\n");
2998         }
2999
3000         seq_puts(m, "#\n");
3001 }
3002
3003 static void test_cpu_buff_start(struct trace_iterator *iter)
3004 {
3005         struct trace_seq *s = &iter->seq;
3006         struct trace_array *tr = iter->tr;
3007
3008         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3009                 return;
3010
3011         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3012                 return;
3013
3014         if (cpumask_available(iter->started) &&
3015             cpumask_test_cpu(iter->cpu, iter->started))
3016                 return;
3017
3018         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3019                 return;
3020
3021         if (cpumask_available(iter->started))
3022                 cpumask_set_cpu(iter->cpu, iter->started);
3023
3024         /* Don't print started cpu buffer for the first entry of the trace */
3025         if (iter->idx > 1)
3026                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3027                                 iter->cpu);
3028 }
3029
3030 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3031 {
3032         struct trace_array *tr = iter->tr;
3033         struct trace_seq *s = &iter->seq;
3034         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3035         struct trace_entry *entry;
3036         struct trace_event *event;
3037
3038         entry = iter->ent;
3039
3040         test_cpu_buff_start(iter);
3041
3042         event = ftrace_find_event(entry->type);
3043
3044         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3045                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3046                         trace_print_lat_context(iter);
3047                 else
3048                         trace_print_context(iter);
3049         }
3050
3051         if (trace_seq_has_overflowed(s))
3052                 return TRACE_TYPE_PARTIAL_LINE;
3053
3054         if (event)
3055                 return event->funcs->trace(iter, sym_flags, event);
3056
3057         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3058
3059         return trace_handle_return(s);
3060 }
3061
3062 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3063 {
3064         struct trace_array *tr = iter->tr;
3065         struct trace_seq *s = &iter->seq;
3066         struct trace_entry *entry;
3067         struct trace_event *event;
3068
3069         entry = iter->ent;
3070
3071         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3072                 trace_seq_printf(s, "%d %d %llu ",
3073                                  entry->pid, iter->cpu, iter->ts);
3074
3075         if (trace_seq_has_overflowed(s))
3076                 return TRACE_TYPE_PARTIAL_LINE;
3077
3078         event = ftrace_find_event(entry->type);
3079         if (event)
3080                 return event->funcs->raw(iter, 0, event);
3081
3082         trace_seq_printf(s, "%d ?\n", entry->type);
3083
3084         return trace_handle_return(s);
3085 }
3086
3087 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3088 {
3089         struct trace_array *tr = iter->tr;
3090         struct trace_seq *s = &iter->seq;
3091         unsigned char newline = '\n';
3092         struct trace_entry *entry;
3093         struct trace_event *event;
3094
3095         entry = iter->ent;
3096
3097         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3098                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3099                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3100                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3101                 if (trace_seq_has_overflowed(s))
3102                         return TRACE_TYPE_PARTIAL_LINE;
3103         }
3104
3105         event = ftrace_find_event(entry->type);
3106         if (event) {
3107                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3108                 if (ret != TRACE_TYPE_HANDLED)
3109                         return ret;
3110         }
3111
3112         SEQ_PUT_FIELD(s, newline);
3113
3114         return trace_handle_return(s);
3115 }
3116
3117 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3118 {
3119         struct trace_array *tr = iter->tr;
3120         struct trace_seq *s = &iter->seq;
3121         struct trace_entry *entry;
3122         struct trace_event *event;
3123
3124         entry = iter->ent;
3125
3126         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3127                 SEQ_PUT_FIELD(s, entry->pid);
3128                 SEQ_PUT_FIELD(s, iter->cpu);
3129                 SEQ_PUT_FIELD(s, iter->ts);
3130                 if (trace_seq_has_overflowed(s))
3131                         return TRACE_TYPE_PARTIAL_LINE;
3132         }
3133
3134         event = ftrace_find_event(entry->type);
3135         return event ? event->funcs->binary(iter, 0, event) :
3136                 TRACE_TYPE_HANDLED;
3137 }
3138
3139 int trace_empty(struct trace_iterator *iter)
3140 {
3141         struct ring_buffer_iter *buf_iter;
3142         int cpu;
3143
3144         /* If we are looking at one CPU buffer, only check that one */
3145         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3146                 cpu = iter->cpu_file;
3147                 buf_iter = trace_buffer_iter(iter, cpu);
3148                 if (buf_iter) {
3149                         if (!ring_buffer_iter_empty(buf_iter))
3150                                 return 0;
3151                 } else {
3152                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3153                                 return 0;
3154                 }
3155                 return 1;
3156         }
3157
3158         for_each_tracing_cpu(cpu) {
3159                 buf_iter = trace_buffer_iter(iter, cpu);
3160                 if (buf_iter) {
3161                         if (!ring_buffer_iter_empty(buf_iter))
3162                                 return 0;
3163                 } else {
3164                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3165                                 return 0;
3166                 }
3167         }
3168
3169         return 1;
3170 }
3171
3172 /*  Called with trace_event_read_lock() held. */
3173 enum print_line_t print_trace_line(struct trace_iterator *iter)
3174 {
3175         struct trace_array *tr = iter->tr;
3176         unsigned long trace_flags = tr->trace_flags;
3177         enum print_line_t ret;
3178
3179         if (iter->lost_events) {
3180                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3181                                  iter->cpu, iter->lost_events);
3182                 if (trace_seq_has_overflowed(&iter->seq))
3183                         return TRACE_TYPE_PARTIAL_LINE;
3184         }
3185
3186         if (iter->trace && iter->trace->print_line) {
3187                 ret = iter->trace->print_line(iter);
3188                 if (ret != TRACE_TYPE_UNHANDLED)
3189                         return ret;
3190         }
3191
3192         if (iter->ent->type == TRACE_BPUTS &&
3193                         trace_flags & TRACE_ITER_PRINTK &&
3194                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3195                 return trace_print_bputs_msg_only(iter);
3196
3197         if (iter->ent->type == TRACE_BPRINT &&
3198                         trace_flags & TRACE_ITER_PRINTK &&
3199                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3200                 return trace_print_bprintk_msg_only(iter);
3201
3202         if (iter->ent->type == TRACE_PRINT &&
3203                         trace_flags & TRACE_ITER_PRINTK &&
3204                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3205                 return trace_print_printk_msg_only(iter);
3206
3207         if (trace_flags & TRACE_ITER_BIN)
3208                 return print_bin_fmt(iter);
3209
3210         if (trace_flags & TRACE_ITER_HEX)
3211                 return print_hex_fmt(iter);
3212
3213         if (trace_flags & TRACE_ITER_RAW)
3214                 return print_raw_fmt(iter);
3215
3216         return print_trace_fmt(iter);
3217 }
3218
3219 void trace_latency_header(struct seq_file *m)
3220 {
3221         struct trace_iterator *iter = m->private;
3222         struct trace_array *tr = iter->tr;
3223
3224         /* print nothing if the buffers are empty */
3225         if (trace_empty(iter))
3226                 return;
3227
3228         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3229                 print_trace_header(m, iter);
3230
3231         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3232                 print_lat_help_header(m);
3233 }
3234
3235 void trace_default_header(struct seq_file *m)
3236 {
3237         struct trace_iterator *iter = m->private;
3238         struct trace_array *tr = iter->tr;
3239         unsigned long trace_flags = tr->trace_flags;
3240
3241         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3242                 return;
3243
3244         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3245                 /* print nothing if the buffers are empty */
3246                 if (trace_empty(iter))
3247                         return;
3248                 print_trace_header(m, iter);
3249                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3250                         print_lat_help_header(m);
3251         } else {
3252                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3253                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3254                                 print_func_help_header_irq(iter->trace_buffer, m);
3255                         else
3256                                 print_func_help_header(iter->trace_buffer, m);
3257                 }
3258         }
3259 }
3260
3261 static void test_ftrace_alive(struct seq_file *m)
3262 {
3263         if (!ftrace_is_dead())
3264                 return;
3265         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3266                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3267 }
3268
3269 #ifdef CONFIG_TRACER_MAX_TRACE
3270 static void show_snapshot_main_help(struct seq_file *m)
3271 {
3272         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3273                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3274                     "#                      Takes a snapshot of the main buffer.\n"
3275                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3276                     "#                      (Doesn't have to be '2' works with any number that\n"
3277                     "#                       is not a '0' or '1')\n");
3278 }
3279
3280 static void show_snapshot_percpu_help(struct seq_file *m)
3281 {
3282         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3283 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3284         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3285                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3286 #else
3287         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3288                     "#                     Must use main snapshot file to allocate.\n");
3289 #endif
3290         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3291                     "#                      (Doesn't have to be '2' works with any number that\n"
3292                     "#                       is not a '0' or '1')\n");
3293 }
3294
3295 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3296 {
3297         if (iter->tr->allocated_snapshot)
3298                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3299         else
3300                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3301
3302         seq_puts(m, "# Snapshot commands:\n");
3303         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3304                 show_snapshot_main_help(m);
3305         else
3306                 show_snapshot_percpu_help(m);
3307 }
3308 #else
3309 /* Should never be called */
3310 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3311 #endif
3312
3313 static int s_show(struct seq_file *m, void *v)
3314 {
3315         struct trace_iterator *iter = v;
3316         int ret;
3317
3318         if (iter->ent == NULL) {
3319                 if (iter->tr) {
3320                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3321                         seq_puts(m, "#\n");
3322                         test_ftrace_alive(m);
3323                 }
3324                 if (iter->snapshot && trace_empty(iter))
3325                         print_snapshot_help(m, iter);
3326                 else if (iter->trace && iter->trace->print_header)
3327                         iter->trace->print_header(m);
3328                 else
3329                         trace_default_header(m);
3330
3331         } else if (iter->leftover) {
3332                 /*
3333                  * If we filled the seq_file buffer earlier, we
3334                  * want to just show it now.
3335                  */
3336                 ret = trace_print_seq(m, &iter->seq);
3337
3338                 /* ret should this time be zero, but you never know */
3339                 iter->leftover = ret;
3340
3341         } else {
3342                 print_trace_line(iter);
3343                 ret = trace_print_seq(m, &iter->seq);
3344                 /*
3345                  * If we overflow the seq_file buffer, then it will
3346                  * ask us for this data again at start up.
3347                  * Use that instead.
3348                  *  ret is 0 if seq_file write succeeded.
3349                  *        -1 otherwise.
3350                  */
3351                 iter->leftover = ret;
3352         }
3353
3354         return 0;
3355 }
3356
3357 /*
3358  * Should be used after trace_array_get(), trace_types_lock
3359  * ensures that i_cdev was already initialized.
3360  */
3361 static inline int tracing_get_cpu(struct inode *inode)
3362 {
3363         if (inode->i_cdev) /* See trace_create_cpu_file() */
3364                 return (long)inode->i_cdev - 1;
3365         return RING_BUFFER_ALL_CPUS;
3366 }
3367
3368 static const struct seq_operations tracer_seq_ops = {
3369         .start          = s_start,
3370         .next           = s_next,
3371         .stop           = s_stop,
3372         .show           = s_show,
3373 };
3374
3375 static struct trace_iterator *
3376 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3377 {
3378         struct trace_array *tr = inode->i_private;
3379         struct trace_iterator *iter;
3380         int cpu;
3381
3382         if (tracing_disabled)
3383                 return ERR_PTR(-ENODEV);
3384
3385         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3386         if (!iter)
3387                 return ERR_PTR(-ENOMEM);
3388
3389         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3390                                     GFP_KERNEL);
3391         if (!iter->buffer_iter)
3392                 goto release;
3393
3394         /*
3395          * We make a copy of the current tracer to avoid concurrent
3396          * changes on it while we are reading.
3397          */
3398         mutex_lock(&trace_types_lock);
3399         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3400         if (!iter->trace)
3401                 goto fail;
3402
3403         *iter->trace = *tr->current_trace;
3404
3405         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3406                 goto fail;
3407
3408         iter->tr = tr;
3409
3410 #ifdef CONFIG_TRACER_MAX_TRACE
3411         /* Currently only the top directory has a snapshot */
3412         if (tr->current_trace->print_max || snapshot)
3413                 iter->trace_buffer = &tr->max_buffer;
3414         else
3415 #endif
3416                 iter->trace_buffer = &tr->trace_buffer;
3417         iter->snapshot = snapshot;
3418         iter->pos = -1;
3419         iter->cpu_file = tracing_get_cpu(inode);
3420         mutex_init(&iter->mutex);
3421
3422         /* Notify the tracer early; before we stop tracing. */
3423         if (iter->trace && iter->trace->open)
3424                 iter->trace->open(iter);
3425
3426         /* Annotate start of buffers if we had overruns */
3427         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3428                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3429
3430         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3431         if (trace_clocks[tr->clock_id].in_ns)
3432                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3433
3434         /* stop the trace while dumping if we are not opening "snapshot" */
3435         if (!iter->snapshot)
3436                 tracing_stop_tr(tr);
3437
3438         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3439                 for_each_tracing_cpu(cpu) {
3440                         iter->buffer_iter[cpu] =
3441                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3442                                                          cpu, GFP_KERNEL);
3443                 }
3444                 ring_buffer_read_prepare_sync();
3445                 for_each_tracing_cpu(cpu) {
3446                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3447                         tracing_iter_reset(iter, cpu);
3448                 }
3449         } else {
3450                 cpu = iter->cpu_file;
3451                 iter->buffer_iter[cpu] =
3452                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3453                                                  cpu, GFP_KERNEL);
3454                 ring_buffer_read_prepare_sync();
3455                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3456                 tracing_iter_reset(iter, cpu);
3457         }
3458
3459         mutex_unlock(&trace_types_lock);
3460
3461         return iter;
3462
3463  fail:
3464         mutex_unlock(&trace_types_lock);
3465         kfree(iter->trace);
3466         kfree(iter->buffer_iter);
3467 release:
3468         seq_release_private(inode, file);
3469         return ERR_PTR(-ENOMEM);
3470 }
3471
3472 int tracing_open_generic(struct inode *inode, struct file *filp)
3473 {
3474         if (tracing_disabled)
3475                 return -ENODEV;
3476
3477         filp->private_data = inode->i_private;
3478         return 0;
3479 }
3480
3481 bool tracing_is_disabled(void)
3482 {
3483         return (tracing_disabled) ? true: false;
3484 }
3485
3486 /*
3487  * Open and update trace_array ref count.
3488  * Must have the current trace_array passed to it.
3489  */
3490 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3491 {
3492         struct trace_array *tr = inode->i_private;
3493
3494         if (tracing_disabled)
3495                 return -ENODEV;
3496
3497         if (trace_array_get(tr) < 0)
3498                 return -ENODEV;
3499
3500         filp->private_data = inode->i_private;
3501
3502         return 0;
3503 }
3504
3505 static int tracing_release(struct inode *inode, struct file *file)
3506 {
3507         struct trace_array *tr = inode->i_private;
3508         struct seq_file *m = file->private_data;
3509         struct trace_iterator *iter;
3510         int cpu;
3511
3512         if (!(file->f_mode & FMODE_READ)) {
3513                 trace_array_put(tr);
3514                 return 0;
3515         }
3516
3517         /* Writes do not use seq_file */
3518         iter = m->private;
3519         mutex_lock(&trace_types_lock);
3520
3521         for_each_tracing_cpu(cpu) {
3522                 if (iter->buffer_iter[cpu])
3523                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3524         }
3525
3526         if (iter->trace && iter->trace->close)
3527                 iter->trace->close(iter);
3528
3529         if (!iter->snapshot)
3530                 /* reenable tracing if it was previously enabled */
3531                 tracing_start_tr(tr);
3532
3533         __trace_array_put(tr);
3534
3535         mutex_unlock(&trace_types_lock);
3536
3537         mutex_destroy(&iter->mutex);
3538         free_cpumask_var(iter->started);
3539         kfree(iter->trace);
3540         kfree(iter->buffer_iter);
3541         seq_release_private(inode, file);
3542
3543         return 0;
3544 }
3545
3546 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3547 {
3548         struct trace_array *tr = inode->i_private;
3549
3550         trace_array_put(tr);
3551         return 0;
3552 }
3553
3554 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3555 {
3556         struct trace_array *tr = inode->i_private;
3557
3558         trace_array_put(tr);
3559
3560         return single_release(inode, file);
3561 }
3562
3563 static int tracing_open(struct inode *inode, struct file *file)
3564 {
3565         struct trace_array *tr = inode->i_private;
3566         struct trace_iterator *iter;
3567         int ret = 0;
3568
3569         if (trace_array_get(tr) < 0)
3570                 return -ENODEV;
3571
3572         /* If this file was open for write, then erase contents */
3573         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3574                 int cpu = tracing_get_cpu(inode);
3575                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3576
3577 #ifdef CONFIG_TRACER_MAX_TRACE
3578                 if (tr->current_trace->print_max)
3579                         trace_buf = &tr->max_buffer;
3580 #endif
3581
3582                 if (cpu == RING_BUFFER_ALL_CPUS)
3583                         tracing_reset_online_cpus(trace_buf);
3584                 else
3585                         tracing_reset(trace_buf, cpu);
3586         }
3587
3588         if (file->f_mode & FMODE_READ) {
3589                 iter = __tracing_open(inode, file, false);
3590                 if (IS_ERR(iter))
3591                         ret = PTR_ERR(iter);
3592                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3593                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3594         }
3595
3596         if (ret < 0)
3597                 trace_array_put(tr);
3598
3599         return ret;
3600 }
3601
3602 /*
3603  * Some tracers are not suitable for instance buffers.
3604  * A tracer is always available for the global array (toplevel)
3605  * or if it explicitly states that it is.
3606  */
3607 static bool
3608 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3609 {
3610         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3611 }
3612
3613 /* Find the next tracer that this trace array may use */
3614 static struct tracer *
3615 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3616 {
3617         while (t && !trace_ok_for_array(t, tr))
3618                 t = t->next;
3619
3620         return t;
3621 }
3622
3623 static void *
3624 t_next(struct seq_file *m, void *v, loff_t *pos)
3625 {
3626         struct trace_array *tr = m->private;
3627         struct tracer *t = v;
3628
3629         (*pos)++;
3630
3631         if (t)
3632                 t = get_tracer_for_array(tr, t->next);
3633
3634         return t;
3635 }
3636
3637 static void *t_start(struct seq_file *m, loff_t *pos)
3638 {
3639         struct trace_array *tr = m->private;
3640         struct tracer *t;
3641         loff_t l = 0;
3642
3643         mutex_lock(&trace_types_lock);
3644
3645         t = get_tracer_for_array(tr, trace_types);
3646         for (; t && l < *pos; t = t_next(m, t, &l))
3647                         ;
3648
3649         return t;
3650 }
3651
3652 static void t_stop(struct seq_file *m, void *p)
3653 {
3654         mutex_unlock(&trace_types_lock);
3655 }
3656
3657 static int t_show(struct seq_file *m, void *v)
3658 {
3659         struct tracer *t = v;
3660
3661         if (!t)
3662                 return 0;
3663
3664         seq_puts(m, t->name);
3665         if (t->next)
3666                 seq_putc(m, ' ');
3667         else
3668                 seq_putc(m, '\n');
3669
3670         return 0;
3671 }
3672
3673 static const struct seq_operations show_traces_seq_ops = {
3674         .start          = t_start,
3675         .next           = t_next,
3676         .stop           = t_stop,
3677         .show           = t_show,
3678 };
3679
3680 static int show_traces_open(struct inode *inode, struct file *file)
3681 {
3682         struct trace_array *tr = inode->i_private;
3683         struct seq_file *m;
3684         int ret;
3685
3686         if (tracing_disabled)
3687                 return -ENODEV;
3688
3689         if (trace_array_get(tr) < 0)
3690                 return -ENODEV;
3691
3692         ret = seq_open(file, &show_traces_seq_ops);
3693         if (ret) {
3694                 trace_array_put(tr);
3695                 return ret;
3696         }
3697
3698         m = file->private_data;
3699         m->private = tr;
3700
3701         return 0;
3702 }
3703
3704 static int show_traces_release(struct inode *inode, struct file *file)
3705 {
3706         struct trace_array *tr = inode->i_private;
3707
3708         trace_array_put(tr);
3709         return seq_release(inode, file);
3710 }
3711
3712 static ssize_t
3713 tracing_write_stub(struct file *filp, const char __user *ubuf,
3714                    size_t count, loff_t *ppos)
3715 {
3716         return count;
3717 }
3718
3719 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3720 {
3721         int ret;
3722
3723         if (file->f_mode & FMODE_READ)
3724                 ret = seq_lseek(file, offset, whence);
3725         else
3726                 file->f_pos = ret = 0;
3727
3728         return ret;
3729 }
3730
3731 static const struct file_operations tracing_fops = {
3732         .open           = tracing_open,
3733         .read           = seq_read,
3734         .write          = tracing_write_stub,
3735         .llseek         = tracing_lseek,
3736         .release        = tracing_release,
3737 };
3738
3739 static const struct file_operations show_traces_fops = {
3740         .open           = show_traces_open,
3741         .read           = seq_read,
3742         .llseek         = seq_lseek,
3743         .release        = show_traces_release,
3744 };
3745
3746 static ssize_t
3747 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3748                      size_t count, loff_t *ppos)
3749 {
3750         struct trace_array *tr = file_inode(filp)->i_private;
3751         char *mask_str;
3752         int len;
3753
3754         len = snprintf(NULL, 0, "%*pb\n",
3755                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3756         mask_str = kmalloc(len, GFP_KERNEL);
3757         if (!mask_str)
3758                 return -ENOMEM;
3759
3760         len = snprintf(mask_str, len, "%*pb\n",
3761                        cpumask_pr_args(tr->tracing_cpumask));
3762         if (len >= count) {
3763                 count = -EINVAL;
3764                 goto out_err;
3765         }
3766         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3767
3768 out_err:
3769         kfree(mask_str);
3770
3771         return count;
3772 }
3773
3774 static ssize_t
3775 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3776                       size_t count, loff_t *ppos)
3777 {
3778         struct trace_array *tr = file_inode(filp)->i_private;
3779         cpumask_var_t tracing_cpumask_new;
3780         int err, cpu;
3781
3782         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3783                 return -ENOMEM;
3784
3785         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3786         if (err)
3787                 goto err_unlock;
3788
3789         local_irq_disable();
3790         arch_spin_lock(&tr->max_lock);
3791         for_each_tracing_cpu(cpu) {
3792                 /*
3793                  * Increase/decrease the disabled counter if we are
3794                  * about to flip a bit in the cpumask:
3795                  */
3796                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3797                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3798                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3799                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3800                 }
3801                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3802                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3803                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3804                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3805                 }
3806         }
3807         arch_spin_unlock(&tr->max_lock);
3808         local_irq_enable();
3809
3810         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3811         free_cpumask_var(tracing_cpumask_new);
3812
3813         return count;
3814
3815 err_unlock:
3816         free_cpumask_var(tracing_cpumask_new);
3817
3818         return err;
3819 }
3820
3821 static const struct file_operations tracing_cpumask_fops = {
3822         .open           = tracing_open_generic_tr,
3823         .read           = tracing_cpumask_read,
3824         .write          = tracing_cpumask_write,
3825         .release        = tracing_release_generic_tr,
3826         .llseek         = generic_file_llseek,
3827 };
3828
3829 static int tracing_trace_options_show(struct seq_file *m, void *v)
3830 {
3831         struct tracer_opt *trace_opts;
3832         struct trace_array *tr = m->private;
3833         u32 tracer_flags;
3834         int i;
3835
3836         mutex_lock(&trace_types_lock);
3837         tracer_flags = tr->current_trace->flags->val;
3838         trace_opts = tr->current_trace->flags->opts;
3839
3840         for (i = 0; trace_options[i]; i++) {
3841                 if (tr->trace_flags & (1 << i))
3842                         seq_printf(m, "%s\n", trace_options[i]);
3843                 else
3844                         seq_printf(m, "no%s\n", trace_options[i]);
3845         }
3846
3847         for (i = 0; trace_opts[i].name; i++) {
3848                 if (tracer_flags & trace_opts[i].bit)
3849                         seq_printf(m, "%s\n", trace_opts[i].name);
3850                 else
3851                         seq_printf(m, "no%s\n", trace_opts[i].name);
3852         }
3853         mutex_unlock(&trace_types_lock);
3854
3855         return 0;
3856 }
3857
3858 static int __set_tracer_option(struct trace_array *tr,
3859                                struct tracer_flags *tracer_flags,
3860                                struct tracer_opt *opts, int neg)
3861 {
3862         struct tracer *trace = tracer_flags->trace;
3863         int ret;
3864
3865         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3866         if (ret)
3867                 return ret;
3868
3869         if (neg)
3870                 tracer_flags->val &= ~opts->bit;
3871         else
3872                 tracer_flags->val |= opts->bit;
3873         return 0;
3874 }
3875
3876 /* Try to assign a tracer specific option */
3877 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3878 {
3879         struct tracer *trace = tr->current_trace;
3880         struct tracer_flags *tracer_flags = trace->flags;
3881         struct tracer_opt *opts = NULL;
3882         int i;
3883
3884         for (i = 0; tracer_flags->opts[i].name; i++) {
3885                 opts = &tracer_flags->opts[i];
3886
3887                 if (strcmp(cmp, opts->name) == 0)
3888                         return __set_tracer_option(tr, trace->flags, opts, neg);
3889         }
3890
3891         return -EINVAL;
3892 }
3893
3894 /* Some tracers require overwrite to stay enabled */
3895 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3896 {
3897         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3898                 return -1;
3899
3900         return 0;
3901 }
3902
3903 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3904 {
3905         /* do nothing if flag is already set */
3906         if (!!(tr->trace_flags & mask) == !!enabled)
3907                 return 0;
3908
3909         /* Give the tracer a chance to approve the change */
3910         if (tr->current_trace->flag_changed)
3911                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3912                         return -EINVAL;
3913
3914         if (enabled)
3915                 tr->trace_flags |= mask;
3916         else
3917                 tr->trace_flags &= ~mask;
3918
3919         if (mask == TRACE_ITER_RECORD_CMD)
3920                 trace_event_enable_cmd_record(enabled);
3921
3922         if (mask == TRACE_ITER_EVENT_FORK)
3923                 trace_event_follow_fork(tr, enabled);
3924
3925         if (mask == TRACE_ITER_OVERWRITE) {
3926                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3927 #ifdef CONFIG_TRACER_MAX_TRACE
3928                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3929 #endif
3930         }
3931
3932         if (mask == TRACE_ITER_PRINTK) {
3933                 trace_printk_start_stop_comm(enabled);
3934                 trace_printk_control(enabled);
3935         }
3936
3937         return 0;
3938 }
3939
3940 static int trace_set_options(struct trace_array *tr, char *option)
3941 {
3942         char *cmp;
3943         int neg = 0;
3944         int ret = -ENODEV;
3945         int i;
3946         size_t orig_len = strlen(option);
3947
3948         cmp = strstrip(option);
3949
3950         if (strncmp(cmp, "no", 2) == 0) {
3951                 neg = 1;
3952                 cmp += 2;
3953         }
3954
3955         mutex_lock(&trace_types_lock);
3956
3957         for (i = 0; trace_options[i]; i++) {
3958                 if (strcmp(cmp, trace_options[i]) == 0) {
3959                         ret = set_tracer_flag(tr, 1 << i, !neg);
3960                         break;
3961                 }
3962         }
3963
3964         /* If no option could be set, test the specific tracer options */
3965         if (!trace_options[i])
3966                 ret = set_tracer_option(tr, cmp, neg);
3967
3968         mutex_unlock(&trace_types_lock);
3969
3970         /*
3971          * If the first trailing whitespace is replaced with '\0' by strstrip,
3972          * turn it back into a space.
3973          */
3974         if (orig_len > strlen(option))
3975                 option[strlen(option)] = ' ';
3976
3977         return ret;
3978 }
3979
3980 static void __init apply_trace_boot_options(void)
3981 {
3982         char *buf = trace_boot_options_buf;
3983         char *option;
3984
3985         while (true) {
3986                 option = strsep(&buf, ",");
3987
3988                 if (!option)
3989                         break;
3990
3991                 if (*option)
3992                         trace_set_options(&global_trace, option);
3993
3994                 /* Put back the comma to allow this to be called again */
3995                 if (buf)
3996                         *(buf - 1) = ',';
3997         }
3998 }
3999
4000 static ssize_t
4001 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4002                         size_t cnt, loff_t *ppos)
4003 {
4004         struct seq_file *m = filp->private_data;
4005         struct trace_array *tr = m->private;
4006         char buf[64];
4007         int ret;
4008
4009         if (cnt >= sizeof(buf))
4010                 return -EINVAL;
4011
4012         if (copy_from_user(buf, ubuf, cnt))
4013                 return -EFAULT;
4014
4015         buf[cnt] = 0;
4016
4017         ret = trace_set_options(tr, buf);
4018         if (ret < 0)
4019                 return ret;
4020
4021         *ppos += cnt;
4022
4023         return cnt;
4024 }
4025
4026 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4027 {
4028         struct trace_array *tr = inode->i_private;
4029         int ret;
4030
4031         if (tracing_disabled)
4032                 return -ENODEV;
4033
4034         if (trace_array_get(tr) < 0)
4035                 return -ENODEV;
4036
4037         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4038         if (ret < 0)
4039                 trace_array_put(tr);
4040
4041         return ret;
4042 }
4043
4044 static const struct file_operations tracing_iter_fops = {
4045         .open           = tracing_trace_options_open,
4046         .read           = seq_read,
4047         .llseek         = seq_lseek,
4048         .release        = tracing_single_release_tr,
4049         .write          = tracing_trace_options_write,
4050 };
4051
4052 static const char readme_msg[] =
4053         "tracing mini-HOWTO:\n\n"
4054         "# echo 0 > tracing_on : quick way to disable tracing\n"
4055         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4056         " Important files:\n"
4057         "  trace\t\t\t- The static contents of the buffer\n"
4058         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4059         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4060         "  current_tracer\t- function and latency tracers\n"
4061         "  available_tracers\t- list of configured tracers for current_tracer\n"
4062         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4063         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4064         "  trace_clock\t\t-change the clock used to order events\n"
4065         "       local:   Per cpu clock but may not be synced across CPUs\n"
4066         "      global:   Synced across CPUs but slows tracing down.\n"
4067         "     counter:   Not a clock, but just an increment\n"
4068         "      uptime:   Jiffy counter from time of boot\n"
4069         "        perf:   Same clock that perf events use\n"
4070 #ifdef CONFIG_X86_64
4071         "     x86-tsc:   TSC cycle counter\n"
4072 #endif
4073         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4074         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4075         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4076         "\t\t\t  Remove sub-buffer with rmdir\n"
4077         "  trace_options\t\t- Set format or modify how tracing happens\n"
4078         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4079         "\t\t\t  option name\n"
4080         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4081 #ifdef CONFIG_DYNAMIC_FTRACE
4082         "\n  available_filter_functions - list of functions that can be filtered on\n"
4083         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4084         "\t\t\t  functions\n"
4085         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4086         "\t     modules: Can select a group via module\n"
4087         "\t      Format: :mod:<module-name>\n"
4088         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4089         "\t    triggers: a command to perform when function is hit\n"
4090         "\t      Format: <function>:<trigger>[:count]\n"
4091         "\t     trigger: traceon, traceoff\n"
4092         "\t\t      enable_event:<system>:<event>\n"
4093         "\t\t      disable_event:<system>:<event>\n"
4094 #ifdef CONFIG_STACKTRACE
4095         "\t\t      stacktrace\n"
4096 #endif
4097 #ifdef CONFIG_TRACER_SNAPSHOT
4098         "\t\t      snapshot\n"
4099 #endif
4100         "\t\t      dump\n"
4101         "\t\t      cpudump\n"
4102         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4103         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4104         "\t     The first one will disable tracing every time do_fault is hit\n"
4105         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4106         "\t       The first time do trap is hit and it disables tracing, the\n"
4107         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4108         "\t       the counter will not decrement. It only decrements when the\n"
4109         "\t       trigger did work\n"
4110         "\t     To remove trigger without count:\n"
4111         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4112         "\t     To remove trigger with a count:\n"
4113         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4114         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4115         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4116         "\t    modules: Can select a group via module command :mod:\n"
4117         "\t    Does not accept triggers\n"
4118 #endif /* CONFIG_DYNAMIC_FTRACE */
4119 #ifdef CONFIG_FUNCTION_TRACER
4120         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4121         "\t\t    (function)\n"
4122 #endif
4123 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4124         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4125         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4126         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4127 #endif
4128 #ifdef CONFIG_TRACER_SNAPSHOT
4129         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4130         "\t\t\t  snapshot buffer. Read the contents for more\n"
4131         "\t\t\t  information\n"
4132 #endif
4133 #ifdef CONFIG_STACK_TRACER
4134         "  stack_trace\t\t- Shows the max stack trace when active\n"
4135         "  stack_max_size\t- Shows current max stack size that was traced\n"
4136         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4137         "\t\t\t  new trace)\n"
4138 #ifdef CONFIG_DYNAMIC_FTRACE
4139         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4140         "\t\t\t  traces\n"
4141 #endif
4142 #endif /* CONFIG_STACK_TRACER */
4143 #ifdef CONFIG_KPROBE_EVENT
4144         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4145         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4146 #endif
4147 #ifdef CONFIG_UPROBE_EVENT
4148         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4149         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4150 #endif
4151 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4152         "\t  accepts: event-definitions (one definition per line)\n"
4153         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4154         "\t           -:[<group>/]<event>\n"
4155 #ifdef CONFIG_KPROBE_EVENT
4156         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4157 #endif
4158 #ifdef CONFIG_UPROBE_EVENT
4159         "\t    place: <path>:<offset>\n"
4160 #endif
4161         "\t     args: <name>=fetcharg[:type]\n"
4162         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4163         "\t           $stack<index>, $stack, $retval, $comm\n"
4164         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4165         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4166 #endif
4167         "  events/\t\t- Directory containing all trace event subsystems:\n"
4168         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4169         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4170         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4171         "\t\t\t  events\n"
4172         "      filter\t\t- If set, only events passing filter are traced\n"
4173         "  events/<system>/<event>/\t- Directory containing control files for\n"
4174         "\t\t\t  <event>:\n"
4175         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4176         "      filter\t\t- If set, only events passing filter are traced\n"
4177         "      trigger\t\t- If set, a command to perform when event is hit\n"
4178         "\t    Format: <trigger>[:count][if <filter>]\n"
4179         "\t   trigger: traceon, traceoff\n"
4180         "\t            enable_event:<system>:<event>\n"
4181         "\t            disable_event:<system>:<event>\n"
4182 #ifdef CONFIG_HIST_TRIGGERS
4183         "\t            enable_hist:<system>:<event>\n"
4184         "\t            disable_hist:<system>:<event>\n"
4185 #endif
4186 #ifdef CONFIG_STACKTRACE
4187         "\t\t    stacktrace\n"
4188 #endif
4189 #ifdef CONFIG_TRACER_SNAPSHOT
4190         "\t\t    snapshot\n"
4191 #endif
4192 #ifdef CONFIG_HIST_TRIGGERS
4193         "\t\t    hist (see below)\n"
4194 #endif
4195         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4196         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4197         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4198         "\t                  events/block/block_unplug/trigger\n"
4199         "\t   The first disables tracing every time block_unplug is hit.\n"
4200         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4201         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4202         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4203         "\t   Like function triggers, the counter is only decremented if it\n"
4204         "\t    enabled or disabled tracing.\n"
4205         "\t   To remove a trigger without a count:\n"
4206         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4207         "\t   To remove a trigger with a count:\n"
4208         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4209         "\t   Filters can be ignored when removing a trigger.\n"
4210 #ifdef CONFIG_HIST_TRIGGERS
4211         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4212         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4213         "\t            [:values=<field1[,field2,...]>]\n"
4214         "\t            [:sort=<field1[,field2,...]>]\n"
4215         "\t            [:size=#entries]\n"
4216         "\t            [:pause][:continue][:clear]\n"
4217         "\t            [:name=histname1]\n"
4218         "\t            [if <filter>]\n\n"
4219         "\t    When a matching event is hit, an entry is added to a hash\n"
4220         "\t    table using the key(s) and value(s) named, and the value of a\n"
4221         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4222         "\t    correspond to fields in the event's format description.  Keys\n"
4223         "\t    can be any field, or the special string 'stacktrace'.\n"
4224         "\t    Compound keys consisting of up to two fields can be specified\n"
4225         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4226         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4227         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4228         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4229         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4230         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4231         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4232         "\t    its histogram data will be shared with other triggers of the\n"
4233         "\t    same name, and trigger hits will update this common data.\n\n"
4234         "\t    Reading the 'hist' file for the event will dump the hash\n"
4235         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4236         "\t    triggers attached to an event, there will be a table for each\n"
4237         "\t    trigger in the output.  The table displayed for a named\n"
4238         "\t    trigger will be the same as any other instance having the\n"
4239         "\t    same name.  The default format used to display a given field\n"
4240         "\t    can be modified by appending any of the following modifiers\n"
4241         "\t    to the field name, as applicable:\n\n"
4242         "\t            .hex        display a number as a hex value\n"
4243         "\t            .sym        display an address as a symbol\n"
4244         "\t            .sym-offset display an address as a symbol and offset\n"
4245         "\t            .execname   display a common_pid as a program name\n"
4246         "\t            .syscall    display a syscall id as a syscall name\n\n"
4247         "\t            .log2       display log2 value rather than raw number\n\n"
4248         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4249         "\t    trigger or to start a hist trigger but not log any events\n"
4250         "\t    until told to do so.  'continue' can be used to start or\n"
4251         "\t    restart a paused hist trigger.\n\n"
4252         "\t    The 'clear' parameter will clear the contents of a running\n"
4253         "\t    hist trigger and leave its current paused/active state\n"
4254         "\t    unchanged.\n\n"
4255         "\t    The enable_hist and disable_hist triggers can be used to\n"
4256         "\t    have one event conditionally start and stop another event's\n"
4257         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4258         "\t    the enable_event and disable_event triggers.\n"
4259 #endif
4260 ;
4261
4262 static ssize_t
4263 tracing_readme_read(struct file *filp, char __user *ubuf,
4264                        size_t cnt, loff_t *ppos)
4265 {
4266         return simple_read_from_buffer(ubuf, cnt, ppos,
4267                                         readme_msg, strlen(readme_msg));
4268 }
4269
4270 static const struct file_operations tracing_readme_fops = {
4271         .open           = tracing_open_generic,
4272         .read           = tracing_readme_read,
4273         .llseek         = generic_file_llseek,
4274 };
4275
4276 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4277 {
4278         unsigned int *ptr = v;
4279
4280         if (*pos || m->count)
4281                 ptr++;
4282
4283         (*pos)++;
4284
4285         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4286              ptr++) {
4287                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4288                         continue;
4289
4290                 return ptr;
4291         }
4292
4293         return NULL;
4294 }
4295
4296 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4297 {
4298         void *v;
4299         loff_t l = 0;
4300
4301         preempt_disable();
4302         arch_spin_lock(&trace_cmdline_lock);
4303
4304         v = &savedcmd->map_cmdline_to_pid[0];
4305         while (l <= *pos) {
4306                 v = saved_cmdlines_next(m, v, &l);
4307                 if (!v)
4308                         return NULL;
4309         }
4310
4311         return v;
4312 }
4313
4314 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4315 {
4316         arch_spin_unlock(&trace_cmdline_lock);
4317         preempt_enable();
4318 }
4319
4320 static int saved_cmdlines_show(struct seq_file *m, void *v)
4321 {
4322         char buf[TASK_COMM_LEN];
4323         unsigned int *pid = v;
4324
4325         __trace_find_cmdline(*pid, buf);
4326         seq_printf(m, "%d %s\n", *pid, buf);
4327         return 0;
4328 }
4329
4330 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4331         .start          = saved_cmdlines_start,
4332         .next           = saved_cmdlines_next,
4333         .stop           = saved_cmdlines_stop,
4334         .show           = saved_cmdlines_show,
4335 };
4336
4337 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4338 {
4339         if (tracing_disabled)
4340                 return -ENODEV;
4341
4342         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4343 }
4344
4345 static const struct file_operations tracing_saved_cmdlines_fops = {
4346         .open           = tracing_saved_cmdlines_open,
4347         .read           = seq_read,
4348         .llseek         = seq_lseek,
4349         .release        = seq_release,
4350 };
4351
4352 static ssize_t
4353 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4354                                  size_t cnt, loff_t *ppos)
4355 {
4356         char buf[64];
4357         int r;
4358
4359         arch_spin_lock(&trace_cmdline_lock);
4360         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4361         arch_spin_unlock(&trace_cmdline_lock);
4362
4363         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4364 }
4365
4366 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4367 {
4368         kfree(s->saved_cmdlines);
4369         kfree(s->map_cmdline_to_pid);
4370         kfree(s);
4371 }
4372
4373 static int tracing_resize_saved_cmdlines(unsigned int val)
4374 {
4375         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4376
4377         s = kmalloc(sizeof(*s), GFP_KERNEL);
4378         if (!s)
4379                 return -ENOMEM;
4380
4381         if (allocate_cmdlines_buffer(val, s) < 0) {
4382                 kfree(s);
4383                 return -ENOMEM;
4384         }
4385
4386         arch_spin_lock(&trace_cmdline_lock);
4387         savedcmd_temp = savedcmd;
4388         savedcmd = s;
4389         arch_spin_unlock(&trace_cmdline_lock);
4390         free_saved_cmdlines_buffer(savedcmd_temp);
4391
4392         return 0;
4393 }
4394
4395 static ssize_t
4396 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4397                                   size_t cnt, loff_t *ppos)
4398 {
4399         unsigned long val;
4400         int ret;
4401
4402         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4403         if (ret)
4404                 return ret;
4405
4406         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4407         if (!val || val > PID_MAX_DEFAULT)
4408                 return -EINVAL;
4409
4410         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4411         if (ret < 0)
4412                 return ret;
4413
4414         *ppos += cnt;
4415
4416         return cnt;
4417 }
4418
4419 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4420         .open           = tracing_open_generic,
4421         .read           = tracing_saved_cmdlines_size_read,
4422         .write          = tracing_saved_cmdlines_size_write,
4423 };
4424
4425 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4426 static union trace_enum_map_item *
4427 update_enum_map(union trace_enum_map_item *ptr)
4428 {
4429         if (!ptr->map.enum_string) {
4430                 if (ptr->tail.next) {
4431                         ptr = ptr->tail.next;
4432                         /* Set ptr to the next real item (skip head) */
4433                         ptr++;
4434                 } else
4435                         return NULL;
4436         }
4437         return ptr;
4438 }
4439
4440 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4441 {
4442         union trace_enum_map_item *ptr = v;
4443
4444         /*
4445          * Paranoid! If ptr points to end, we don't want to increment past it.
4446          * This really should never happen.
4447          */
4448         ptr = update_enum_map(ptr);
4449         if (WARN_ON_ONCE(!ptr))
4450                 return NULL;
4451
4452         ptr++;
4453
4454         (*pos)++;
4455
4456         ptr = update_enum_map(ptr);
4457
4458         return ptr;
4459 }
4460
4461 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4462 {
4463         union trace_enum_map_item *v;
4464         loff_t l = 0;
4465
4466         mutex_lock(&trace_enum_mutex);
4467
4468         v = trace_enum_maps;
4469         if (v)
4470                 v++;
4471
4472         while (v && l < *pos) {
4473                 v = enum_map_next(m, v, &l);
4474         }
4475
4476         return v;
4477 }
4478
4479 static void enum_map_stop(struct seq_file *m, void *v)
4480 {
4481         mutex_unlock(&trace_enum_mutex);
4482 }
4483
4484 static int enum_map_show(struct seq_file *m, void *v)
4485 {
4486         union trace_enum_map_item *ptr = v;
4487
4488         seq_printf(m, "%s %ld (%s)\n",
4489                    ptr->map.enum_string, ptr->map.enum_value,
4490                    ptr->map.system);
4491
4492         return 0;
4493 }
4494
4495 static const struct seq_operations tracing_enum_map_seq_ops = {
4496         .start          = enum_map_start,
4497         .next           = enum_map_next,
4498         .stop           = enum_map_stop,
4499         .show           = enum_map_show,
4500 };
4501
4502 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4503 {
4504         if (tracing_disabled)
4505                 return -ENODEV;
4506
4507         return seq_open(filp, &tracing_enum_map_seq_ops);
4508 }
4509
4510 static const struct file_operations tracing_enum_map_fops = {
4511         .open           = tracing_enum_map_open,
4512         .read           = seq_read,
4513         .llseek         = seq_lseek,
4514         .release        = seq_release,
4515 };
4516
4517 static inline union trace_enum_map_item *
4518 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4519 {
4520         /* Return tail of array given the head */
4521         return ptr + ptr->head.length + 1;
4522 }
4523
4524 static void
4525 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4526                            int len)
4527 {
4528         struct trace_enum_map **stop;
4529         struct trace_enum_map **map;
4530         union trace_enum_map_item *map_array;
4531         union trace_enum_map_item *ptr;
4532
4533         stop = start + len;
4534
4535         /*
4536          * The trace_enum_maps contains the map plus a head and tail item,
4537          * where the head holds the module and length of array, and the
4538          * tail holds a pointer to the next list.
4539          */
4540         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4541         if (!map_array) {
4542                 pr_warn("Unable to allocate trace enum mapping\n");
4543                 return;
4544         }
4545
4546         mutex_lock(&trace_enum_mutex);
4547
4548         if (!trace_enum_maps)
4549                 trace_enum_maps = map_array;
4550         else {
4551                 ptr = trace_enum_maps;
4552                 for (;;) {
4553                         ptr = trace_enum_jmp_to_tail(ptr);
4554                         if (!ptr->tail.next)
4555                                 break;
4556                         ptr = ptr->tail.next;
4557
4558                 }
4559                 ptr->tail.next = map_array;
4560         }
4561         map_array->head.mod = mod;
4562         map_array->head.length = len;
4563         map_array++;
4564
4565         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4566                 map_array->map = **map;
4567                 map_array++;
4568         }
4569         memset(map_array, 0, sizeof(*map_array));
4570
4571         mutex_unlock(&trace_enum_mutex);
4572 }
4573
4574 static void trace_create_enum_file(struct dentry *d_tracer)
4575 {
4576         trace_create_file("enum_map", 0444, d_tracer,
4577                           NULL, &tracing_enum_map_fops);
4578 }
4579
4580 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4581 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4582 static inline void trace_insert_enum_map_file(struct module *mod,
4583                               struct trace_enum_map **start, int len) { }
4584 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4585
4586 static void trace_insert_enum_map(struct module *mod,
4587                                   struct trace_enum_map **start, int len)
4588 {
4589         struct trace_enum_map **map;
4590
4591         if (len <= 0)
4592                 return;
4593
4594         map = start;
4595
4596         trace_event_enum_update(map, len);
4597
4598         trace_insert_enum_map_file(mod, start, len);
4599 }
4600
4601 static ssize_t
4602 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4603                        size_t cnt, loff_t *ppos)
4604 {
4605         struct trace_array *tr = filp->private_data;
4606         char buf[MAX_TRACER_SIZE+2];
4607         int r;
4608
4609         mutex_lock(&trace_types_lock);
4610         r = sprintf(buf, "%s\n", tr->current_trace->name);
4611         mutex_unlock(&trace_types_lock);
4612
4613         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4614 }
4615
4616 int tracer_init(struct tracer *t, struct trace_array *tr)
4617 {
4618         tracing_reset_online_cpus(&tr->trace_buffer);
4619         return t->init(tr);
4620 }
4621
4622 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4623 {
4624         int cpu;
4625
4626         for_each_tracing_cpu(cpu)
4627                 per_cpu_ptr(buf->data, cpu)->entries = val;
4628 }
4629
4630 #ifdef CONFIG_TRACER_MAX_TRACE
4631 /* resize @tr's buffer to the size of @size_tr's entries */
4632 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4633                                         struct trace_buffer *size_buf, int cpu_id)
4634 {
4635         int cpu, ret = 0;
4636
4637         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4638                 for_each_tracing_cpu(cpu) {
4639                         ret = ring_buffer_resize(trace_buf->buffer,
4640                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4641                         if (ret < 0)
4642                                 break;
4643                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4644                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4645                 }
4646         } else {
4647                 ret = ring_buffer_resize(trace_buf->buffer,
4648                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4649                 if (ret == 0)
4650                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4651                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4652         }
4653
4654         return ret;
4655 }
4656 #endif /* CONFIG_TRACER_MAX_TRACE */
4657
4658 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4659                                         unsigned long size, int cpu)
4660 {
4661         int ret;
4662
4663         /*
4664          * If kernel or user changes the size of the ring buffer
4665          * we use the size that was given, and we can forget about
4666          * expanding it later.
4667          */
4668         ring_buffer_expanded = true;
4669
4670         /* May be called before buffers are initialized */
4671         if (!tr->trace_buffer.buffer)
4672                 return 0;
4673
4674         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4675         if (ret < 0)
4676                 return ret;
4677
4678 #ifdef CONFIG_TRACER_MAX_TRACE
4679         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4680             !tr->current_trace->use_max_tr)
4681                 goto out;
4682
4683         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4684         if (ret < 0) {
4685                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4686                                                      &tr->trace_buffer, cpu);
4687                 if (r < 0) {
4688                         /*
4689                          * AARGH! We are left with different
4690                          * size max buffer!!!!
4691                          * The max buffer is our "snapshot" buffer.
4692                          * When a tracer needs a snapshot (one of the
4693                          * latency tracers), it swaps the max buffer
4694                          * with the saved snap shot. We succeeded to
4695                          * update the size of the main buffer, but failed to
4696                          * update the size of the max buffer. But when we tried
4697                          * to reset the main buffer to the original size, we
4698                          * failed there too. This is very unlikely to
4699                          * happen, but if it does, warn and kill all
4700                          * tracing.
4701                          */
4702                         WARN_ON(1);
4703                         tracing_disabled = 1;
4704                 }
4705                 return ret;
4706         }
4707
4708         if (cpu == RING_BUFFER_ALL_CPUS)
4709                 set_buffer_entries(&tr->max_buffer, size);
4710         else
4711                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4712
4713  out:
4714 #endif /* CONFIG_TRACER_MAX_TRACE */
4715
4716         if (cpu == RING_BUFFER_ALL_CPUS)
4717                 set_buffer_entries(&tr->trace_buffer, size);
4718         else
4719                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4720
4721         return ret;
4722 }
4723
4724 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4725                                           unsigned long size, int cpu_id)
4726 {
4727         int ret = size;
4728
4729         mutex_lock(&trace_types_lock);
4730
4731         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4732                 /* make sure, this cpu is enabled in the mask */
4733                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4734                         ret = -EINVAL;
4735                         goto out;
4736                 }
4737         }
4738
4739         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4740         if (ret < 0)
4741                 ret = -ENOMEM;
4742
4743 out:
4744         mutex_unlock(&trace_types_lock);
4745
4746         return ret;
4747 }
4748
4749
4750 /**
4751  * tracing_update_buffers - used by tracing facility to expand ring buffers
4752  *
4753  * To save on memory when the tracing is never used on a system with it
4754  * configured in. The ring buffers are set to a minimum size. But once
4755  * a user starts to use the tracing facility, then they need to grow
4756  * to their default size.
4757  *
4758  * This function is to be called when a tracer is about to be used.
4759  */
4760 int tracing_update_buffers(void)
4761 {
4762         int ret = 0;
4763
4764         mutex_lock(&trace_types_lock);
4765         if (!ring_buffer_expanded)
4766                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4767                                                 RING_BUFFER_ALL_CPUS);
4768         mutex_unlock(&trace_types_lock);
4769
4770         return ret;
4771 }
4772
4773 struct trace_option_dentry;
4774
4775 static void
4776 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4777
4778 /*
4779  * Used to clear out the tracer before deletion of an instance.
4780  * Must have trace_types_lock held.
4781  */
4782 static void tracing_set_nop(struct trace_array *tr)
4783 {
4784         if (tr->current_trace == &nop_trace)
4785                 return;
4786         
4787         tr->current_trace->enabled--;
4788
4789         if (tr->current_trace->reset)
4790                 tr->current_trace->reset(tr);
4791
4792         tr->current_trace = &nop_trace;
4793 }
4794
4795 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4796 {
4797         /* Only enable if the directory has been created already. */
4798         if (!tr->dir)
4799                 return;
4800
4801         create_trace_option_files(tr, t);
4802 }
4803
4804 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4805 {
4806         struct tracer *t;
4807 #ifdef CONFIG_TRACER_MAX_TRACE
4808         bool had_max_tr;
4809 #endif
4810         int ret = 0;
4811
4812         mutex_lock(&trace_types_lock);
4813
4814         if (!ring_buffer_expanded) {
4815                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4816                                                 RING_BUFFER_ALL_CPUS);
4817                 if (ret < 0)
4818                         goto out;
4819                 ret = 0;
4820         }
4821
4822         for (t = trace_types; t; t = t->next) {
4823                 if (strcmp(t->name, buf) == 0)
4824                         break;
4825         }
4826         if (!t) {
4827                 ret = -EINVAL;
4828                 goto out;
4829         }
4830         if (t == tr->current_trace)
4831                 goto out;
4832
4833         /* Some tracers are only allowed for the top level buffer */
4834         if (!trace_ok_for_array(t, tr)) {
4835                 ret = -EINVAL;
4836                 goto out;
4837         }
4838
4839         /* If trace pipe files are being read, we can't change the tracer */
4840         if (tr->current_trace->ref) {
4841                 ret = -EBUSY;
4842                 goto out;
4843         }
4844
4845         trace_branch_disable();
4846
4847         tr->current_trace->enabled--;
4848
4849         if (tr->current_trace->reset)
4850                 tr->current_trace->reset(tr);
4851
4852         /* Current trace needs to be nop_trace before synchronize_sched */
4853         tr->current_trace = &nop_trace;
4854
4855 #ifdef CONFIG_TRACER_MAX_TRACE
4856         had_max_tr = tr->allocated_snapshot;
4857
4858         if (had_max_tr && !t->use_max_tr) {
4859                 /*
4860                  * We need to make sure that the update_max_tr sees that
4861                  * current_trace changed to nop_trace to keep it from
4862                  * swapping the buffers after we resize it.
4863                  * The update_max_tr is called from interrupts disabled
4864                  * so a synchronized_sched() is sufficient.
4865                  */
4866                 synchronize_sched();
4867                 free_snapshot(tr);
4868         }
4869 #endif
4870
4871 #ifdef CONFIG_TRACER_MAX_TRACE
4872         if (t->use_max_tr && !had_max_tr) {
4873                 ret = alloc_snapshot(tr);
4874                 if (ret < 0)
4875                         goto out;
4876         }
4877 #endif
4878
4879         if (t->init) {
4880                 ret = tracer_init(t, tr);
4881                 if (ret)
4882                         goto out;
4883         }
4884
4885         tr->current_trace = t;
4886         tr->current_trace->enabled++;
4887         trace_branch_enable(tr);
4888  out:
4889         mutex_unlock(&trace_types_lock);
4890
4891         return ret;
4892 }
4893
4894 static ssize_t
4895 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4896                         size_t cnt, loff_t *ppos)
4897 {
4898         struct trace_array *tr = filp->private_data;
4899         char buf[MAX_TRACER_SIZE+1];
4900         int i;
4901         size_t ret;
4902         int err;
4903
4904         ret = cnt;
4905
4906         if (cnt > MAX_TRACER_SIZE)
4907                 cnt = MAX_TRACER_SIZE;
4908
4909         if (copy_from_user(buf, ubuf, cnt))
4910                 return -EFAULT;
4911
4912         buf[cnt] = 0;
4913
4914         /* strip ending whitespace. */
4915         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4916                 buf[i] = 0;
4917
4918         err = tracing_set_tracer(tr, buf);
4919         if (err)
4920                 return err;
4921
4922         *ppos += ret;
4923
4924         return ret;
4925 }
4926
4927 static ssize_t
4928 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4929                    size_t cnt, loff_t *ppos)
4930 {
4931         char buf[64];
4932         int r;
4933
4934         r = snprintf(buf, sizeof(buf), "%ld\n",
4935                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4936         if (r > sizeof(buf))
4937                 r = sizeof(buf);
4938         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4939 }
4940
4941 static ssize_t
4942 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4943                     size_t cnt, loff_t *ppos)
4944 {
4945         unsigned long val;
4946         int ret;
4947
4948         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4949         if (ret)
4950                 return ret;
4951
4952         *ptr = val * 1000;
4953
4954         return cnt;
4955 }
4956
4957 static ssize_t
4958 tracing_thresh_read(struct file *filp, char __user *ubuf,
4959                     size_t cnt, loff_t *ppos)
4960 {
4961         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4962 }
4963
4964 static ssize_t
4965 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4966                      size_t cnt, loff_t *ppos)
4967 {
4968         struct trace_array *tr = filp->private_data;
4969         int ret;
4970
4971         mutex_lock(&trace_types_lock);
4972         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4973         if (ret < 0)
4974                 goto out;
4975
4976         if (tr->current_trace->update_thresh) {
4977                 ret = tr->current_trace->update_thresh(tr);
4978                 if (ret < 0)
4979                         goto out;
4980         }
4981
4982         ret = cnt;
4983 out:
4984         mutex_unlock(&trace_types_lock);
4985
4986         return ret;
4987 }
4988
4989 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4990
4991 static ssize_t
4992 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4993                      size_t cnt, loff_t *ppos)
4994 {
4995         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4996 }
4997
4998 static ssize_t
4999 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5000                       size_t cnt, loff_t *ppos)
5001 {
5002         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5003 }
5004
5005 #endif
5006
5007 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5008 {
5009         struct trace_array *tr = inode->i_private;
5010         struct trace_iterator *iter;
5011         int ret = 0;
5012
5013         if (tracing_disabled)
5014                 return -ENODEV;
5015
5016         if (trace_array_get(tr) < 0)
5017                 return -ENODEV;
5018
5019         mutex_lock(&trace_types_lock);
5020
5021         /* create a buffer to store the information to pass to userspace */
5022         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5023         if (!iter) {
5024                 ret = -ENOMEM;
5025                 __trace_array_put(tr);
5026                 goto out;
5027         }
5028
5029         trace_seq_init(&iter->seq);
5030         iter->trace = tr->current_trace;
5031
5032         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5033                 ret = -ENOMEM;
5034                 goto fail;
5035         }
5036
5037         /* trace pipe does not show start of buffer */
5038         cpumask_setall(iter->started);
5039
5040         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5041                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5042
5043         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5044         if (trace_clocks[tr->clock_id].in_ns)
5045                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5046
5047         iter->tr = tr;
5048         iter->trace_buffer = &tr->trace_buffer;
5049         iter->cpu_file = tracing_get_cpu(inode);
5050         mutex_init(&iter->mutex);
5051         filp->private_data = iter;
5052
5053         if (iter->trace->pipe_open)
5054                 iter->trace->pipe_open(iter);
5055
5056         nonseekable_open(inode, filp);
5057
5058         tr->current_trace->ref++;
5059 out:
5060         mutex_unlock(&trace_types_lock);
5061         return ret;
5062
5063 fail:
5064         kfree(iter);
5065         __trace_array_put(tr);
5066         mutex_unlock(&trace_types_lock);
5067         return ret;
5068 }
5069
5070 static int tracing_release_pipe(struct inode *inode, struct file *file)
5071 {
5072         struct trace_iterator *iter = file->private_data;
5073         struct trace_array *tr = inode->i_private;
5074
5075         mutex_lock(&trace_types_lock);
5076
5077         tr->current_trace->ref--;
5078
5079         if (iter->trace->pipe_close)
5080                 iter->trace->pipe_close(iter);
5081
5082         mutex_unlock(&trace_types_lock);
5083
5084         free_cpumask_var(iter->started);
5085         mutex_destroy(&iter->mutex);
5086         kfree(iter);
5087
5088         trace_array_put(tr);
5089
5090         return 0;
5091 }
5092
5093 static unsigned int
5094 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5095 {
5096         struct trace_array *tr = iter->tr;
5097
5098         /* Iterators are static, they should be filled or empty */
5099         if (trace_buffer_iter(iter, iter->cpu_file))
5100                 return POLLIN | POLLRDNORM;
5101
5102         if (tr->trace_flags & TRACE_ITER_BLOCK)
5103                 /*
5104                  * Always select as readable when in blocking mode
5105                  */
5106                 return POLLIN | POLLRDNORM;
5107         else
5108                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5109                                              filp, poll_table);
5110 }
5111
5112 static unsigned int
5113 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5114 {
5115         struct trace_iterator *iter = filp->private_data;
5116
5117         return trace_poll(iter, filp, poll_table);
5118 }
5119
5120 /* Must be called with iter->mutex held. */
5121 static int tracing_wait_pipe(struct file *filp)
5122 {
5123         struct trace_iterator *iter = filp->private_data;
5124         int ret;
5125
5126         while (trace_empty(iter)) {
5127
5128                 if ((filp->f_flags & O_NONBLOCK)) {
5129                         return -EAGAIN;
5130                 }
5131
5132                 /*
5133                  * We block until we read something and tracing is disabled.
5134                  * We still block if tracing is disabled, but we have never
5135                  * read anything. This allows a user to cat this file, and
5136                  * then enable tracing. But after we have read something,
5137                  * we give an EOF when tracing is again disabled.
5138                  *
5139                  * iter->pos will be 0 if we haven't read anything.
5140                  */
5141                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5142                         break;
5143
5144                 mutex_unlock(&iter->mutex);
5145
5146                 ret = wait_on_pipe(iter, false);
5147
5148                 mutex_lock(&iter->mutex);
5149
5150                 if (ret)
5151                         return ret;
5152         }
5153
5154         return 1;
5155 }
5156
5157 /*
5158  * Consumer reader.
5159  */
5160 static ssize_t
5161 tracing_read_pipe(struct file *filp, char __user *ubuf,
5162                   size_t cnt, loff_t *ppos)
5163 {
5164         struct trace_iterator *iter = filp->private_data;
5165         ssize_t sret;
5166
5167         /*
5168          * Avoid more than one consumer on a single file descriptor
5169          * This is just a matter of traces coherency, the ring buffer itself
5170          * is protected.
5171          */
5172         mutex_lock(&iter->mutex);
5173
5174         /* return any leftover data */
5175         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5176         if (sret != -EBUSY)
5177                 goto out;
5178
5179         trace_seq_init(&iter->seq);
5180
5181         if (iter->trace->read) {
5182                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5183                 if (sret)
5184                         goto out;
5185         }
5186
5187 waitagain:
5188         sret = tracing_wait_pipe(filp);
5189         if (sret <= 0)
5190                 goto out;
5191
5192         /* stop when tracing is finished */
5193         if (trace_empty(iter)) {
5194                 sret = 0;
5195                 goto out;
5196         }
5197
5198         if (cnt >= PAGE_SIZE)
5199                 cnt = PAGE_SIZE - 1;
5200
5201         /* reset all but tr, trace, and overruns */
5202         memset(&iter->seq, 0,
5203                sizeof(struct trace_iterator) -
5204                offsetof(struct trace_iterator, seq));
5205         cpumask_clear(iter->started);
5206         trace_seq_init(&iter->seq);
5207         iter->pos = -1;
5208
5209         trace_event_read_lock();
5210         trace_access_lock(iter->cpu_file);
5211         while (trace_find_next_entry_inc(iter) != NULL) {
5212                 enum print_line_t ret;
5213                 int save_len = iter->seq.seq.len;
5214
5215                 ret = print_trace_line(iter);
5216                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5217                         /* don't print partial lines */
5218                         iter->seq.seq.len = save_len;
5219                         break;
5220                 }
5221                 if (ret != TRACE_TYPE_NO_CONSUME)
5222                         trace_consume(iter);
5223
5224                 if (trace_seq_used(&iter->seq) >= cnt)
5225                         break;
5226
5227                 /*
5228                  * Setting the full flag means we reached the trace_seq buffer
5229                  * size and we should leave by partial output condition above.
5230                  * One of the trace_seq_* functions is not used properly.
5231                  */
5232                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5233                           iter->ent->type);
5234         }
5235         trace_access_unlock(iter->cpu_file);
5236         trace_event_read_unlock();
5237
5238         /* Now copy what we have to the user */
5239         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5240         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5241                 trace_seq_init(&iter->seq);
5242
5243         /*
5244          * If there was nothing to send to user, in spite of consuming trace
5245          * entries, go back to wait for more entries.
5246          */
5247         if (sret == -EBUSY)
5248                 goto waitagain;
5249
5250 out:
5251         mutex_unlock(&iter->mutex);
5252
5253         return sret;
5254 }
5255
5256 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5257                                      unsigned int idx)
5258 {
5259         __free_page(spd->pages[idx]);
5260 }
5261
5262 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5263         .can_merge              = 0,
5264         .confirm                = generic_pipe_buf_confirm,
5265         .release                = generic_pipe_buf_release,
5266         .steal                  = generic_pipe_buf_steal,
5267         .get                    = generic_pipe_buf_get,
5268 };
5269
5270 static size_t
5271 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5272 {
5273         size_t count;
5274         int save_len;
5275         int ret;
5276
5277         /* Seq buffer is page-sized, exactly what we need. */
5278         for (;;) {
5279                 save_len = iter->seq.seq.len;
5280                 ret = print_trace_line(iter);
5281
5282                 if (trace_seq_has_overflowed(&iter->seq)) {
5283                         iter->seq.seq.len = save_len;
5284                         break;
5285                 }
5286
5287                 /*
5288                  * This should not be hit, because it should only
5289                  * be set if the iter->seq overflowed. But check it
5290                  * anyway to be safe.
5291                  */
5292                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5293                         iter->seq.seq.len = save_len;
5294                         break;
5295                 }
5296
5297                 count = trace_seq_used(&iter->seq) - save_len;
5298                 if (rem < count) {
5299                         rem = 0;
5300                         iter->seq.seq.len = save_len;
5301                         break;
5302                 }
5303
5304                 if (ret != TRACE_TYPE_NO_CONSUME)
5305                         trace_consume(iter);
5306                 rem -= count;
5307                 if (!trace_find_next_entry_inc(iter))   {
5308                         rem = 0;
5309                         iter->ent = NULL;
5310                         break;
5311                 }
5312         }
5313
5314         return rem;
5315 }
5316
5317 static ssize_t tracing_splice_read_pipe(struct file *filp,
5318                                         loff_t *ppos,
5319                                         struct pipe_inode_info *pipe,
5320                                         size_t len,
5321                                         unsigned int flags)
5322 {
5323         struct page *pages_def[PIPE_DEF_BUFFERS];
5324         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5325         struct trace_iterator *iter = filp->private_data;
5326         struct splice_pipe_desc spd = {
5327                 .pages          = pages_def,
5328                 .partial        = partial_def,
5329                 .nr_pages       = 0, /* This gets updated below. */
5330                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5331                 .flags          = flags,
5332                 .ops            = &tracing_pipe_buf_ops,
5333                 .spd_release    = tracing_spd_release_pipe,
5334         };
5335         ssize_t ret;
5336         size_t rem;
5337         unsigned int i;
5338
5339         if (splice_grow_spd(pipe, &spd))
5340                 return -ENOMEM;
5341
5342         mutex_lock(&iter->mutex);
5343
5344         if (iter->trace->splice_read) {
5345                 ret = iter->trace->splice_read(iter, filp,
5346                                                ppos, pipe, len, flags);
5347                 if (ret)
5348                         goto out_err;
5349         }
5350
5351         ret = tracing_wait_pipe(filp);
5352         if (ret <= 0)
5353                 goto out_err;
5354
5355         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5356                 ret = -EFAULT;
5357                 goto out_err;
5358         }
5359
5360         trace_event_read_lock();
5361         trace_access_lock(iter->cpu_file);
5362
5363         /* Fill as many pages as possible. */
5364         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5365                 spd.pages[i] = alloc_page(GFP_KERNEL);
5366                 if (!spd.pages[i])
5367                         break;
5368
5369                 rem = tracing_fill_pipe_page(rem, iter);
5370
5371                 /* Copy the data into the page, so we can start over. */
5372                 ret = trace_seq_to_buffer(&iter->seq,
5373                                           page_address(spd.pages[i]),
5374                                           trace_seq_used(&iter->seq));
5375                 if (ret < 0) {
5376                         __free_page(spd.pages[i]);
5377                         break;
5378                 }
5379                 spd.partial[i].offset = 0;
5380                 spd.partial[i].len = trace_seq_used(&iter->seq);
5381
5382                 trace_seq_init(&iter->seq);
5383         }
5384
5385         trace_access_unlock(iter->cpu_file);
5386         trace_event_read_unlock();
5387         mutex_unlock(&iter->mutex);
5388
5389         spd.nr_pages = i;
5390
5391         if (i)
5392                 ret = splice_to_pipe(pipe, &spd);
5393         else
5394                 ret = 0;
5395 out:
5396         splice_shrink_spd(&spd);
5397         return ret;
5398
5399 out_err:
5400         mutex_unlock(&iter->mutex);
5401         goto out;
5402 }
5403
5404 static ssize_t
5405 tracing_entries_read(struct file *filp, char __user *ubuf,
5406                      size_t cnt, loff_t *ppos)
5407 {
5408         struct inode *inode = file_inode(filp);
5409         struct trace_array *tr = inode->i_private;
5410         int cpu = tracing_get_cpu(inode);
5411         char buf[64];
5412         int r = 0;
5413         ssize_t ret;
5414
5415         mutex_lock(&trace_types_lock);
5416
5417         if (cpu == RING_BUFFER_ALL_CPUS) {
5418                 int cpu, buf_size_same;
5419                 unsigned long size;
5420
5421                 size = 0;
5422                 buf_size_same = 1;
5423                 /* check if all cpu sizes are same */
5424                 for_each_tracing_cpu(cpu) {
5425                         /* fill in the size from first enabled cpu */
5426                         if (size == 0)
5427                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5428                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5429                                 buf_size_same = 0;
5430                                 break;
5431                         }
5432                 }
5433
5434                 if (buf_size_same) {
5435                         if (!ring_buffer_expanded)
5436                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5437                                             size >> 10,
5438                                             trace_buf_size >> 10);
5439                         else
5440                                 r = sprintf(buf, "%lu\n", size >> 10);
5441                 } else
5442                         r = sprintf(buf, "X\n");
5443         } else
5444                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5445
5446         mutex_unlock(&trace_types_lock);
5447
5448         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5449         return ret;
5450 }
5451
5452 static ssize_t
5453 tracing_entries_write(struct file *filp, const char __user *ubuf,
5454                       size_t cnt, loff_t *ppos)
5455 {
5456         struct inode *inode = file_inode(filp);
5457         struct trace_array *tr = inode->i_private;
5458         unsigned long val;
5459         int ret;
5460
5461         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5462         if (ret)
5463                 return ret;
5464
5465         /* must have at least 1 entry */
5466         if (!val)
5467                 return -EINVAL;
5468
5469         /* value is in KB */
5470         val <<= 10;
5471         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5472         if (ret < 0)
5473                 return ret;
5474
5475         *ppos += cnt;
5476
5477         return cnt;
5478 }
5479
5480 static ssize_t
5481 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5482                                 size_t cnt, loff_t *ppos)
5483 {
5484         struct trace_array *tr = filp->private_data;
5485         char buf[64];
5486         int r, cpu;
5487         unsigned long size = 0, expanded_size = 0;
5488
5489         mutex_lock(&trace_types_lock);
5490         for_each_tracing_cpu(cpu) {
5491                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5492                 if (!ring_buffer_expanded)
5493                         expanded_size += trace_buf_size >> 10;
5494         }
5495         if (ring_buffer_expanded)
5496                 r = sprintf(buf, "%lu\n", size);
5497         else
5498                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5499         mutex_unlock(&trace_types_lock);
5500
5501         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5502 }
5503
5504 static ssize_t
5505 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5506                           size_t cnt, loff_t *ppos)
5507 {
5508         /*
5509          * There is no need to read what the user has written, this function
5510          * is just to make sure that there is no error when "echo" is used
5511          */
5512
5513         *ppos += cnt;
5514
5515         return cnt;
5516 }
5517
5518 static int
5519 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5520 {
5521         struct trace_array *tr = inode->i_private;
5522
5523         /* disable tracing ? */
5524         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5525                 tracer_tracing_off(tr);
5526         /* resize the ring buffer to 0 */
5527         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5528
5529         trace_array_put(tr);
5530
5531         return 0;
5532 }
5533
5534 static ssize_t
5535 tracing_mark_write(struct file *filp, const char __user *ubuf,
5536                                         size_t cnt, loff_t *fpos)
5537 {
5538         unsigned long addr = (unsigned long)ubuf;
5539         struct trace_array *tr = filp->private_data;
5540         struct ring_buffer_event *event;
5541         struct ring_buffer *buffer;
5542         struct print_entry *entry;
5543         unsigned long irq_flags;
5544         struct page *pages[2];
5545         void *map_page[2];
5546         int nr_pages = 1;
5547         ssize_t written;
5548         int offset;
5549         int size;
5550         int len;
5551         int ret;
5552         int i;
5553
5554         if (tracing_disabled)
5555                 return -EINVAL;
5556
5557         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5558                 return -EINVAL;
5559
5560         if (cnt > TRACE_BUF_SIZE)
5561                 cnt = TRACE_BUF_SIZE;
5562
5563         /*
5564          * Userspace is injecting traces into the kernel trace buffer.
5565          * We want to be as non intrusive as possible.
5566          * To do so, we do not want to allocate any special buffers
5567          * or take any locks, but instead write the userspace data
5568          * straight into the ring buffer.
5569          *
5570          * First we need to pin the userspace buffer into memory,
5571          * which, most likely it is, because it just referenced it.
5572          * But there's no guarantee that it is. By using get_user_pages_fast()
5573          * and kmap_atomic/kunmap_atomic() we can get access to the
5574          * pages directly. We then write the data directly into the
5575          * ring buffer.
5576          */
5577         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5578
5579         /* check if we cross pages */
5580         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5581                 nr_pages = 2;
5582
5583         offset = addr & (PAGE_SIZE - 1);
5584         addr &= PAGE_MASK;
5585
5586         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5587         if (ret < nr_pages) {
5588                 while (--ret >= 0)
5589                         put_page(pages[ret]);
5590                 written = -EFAULT;
5591                 goto out;
5592         }
5593
5594         for (i = 0; i < nr_pages; i++)
5595                 map_page[i] = kmap_atomic(pages[i]);
5596
5597         local_save_flags(irq_flags);
5598         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5599         buffer = tr->trace_buffer.buffer;
5600         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5601                                           irq_flags, preempt_count());
5602         if (!event) {
5603                 /* Ring buffer disabled, return as if not open for write */
5604                 written = -EBADF;
5605                 goto out_unlock;
5606         }
5607
5608         entry = ring_buffer_event_data(event);
5609         entry->ip = _THIS_IP_;
5610
5611         if (nr_pages == 2) {
5612                 len = PAGE_SIZE - offset;
5613                 memcpy(&entry->buf, map_page[0] + offset, len);
5614                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5615         } else
5616                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5617
5618         if (entry->buf[cnt - 1] != '\n') {
5619                 entry->buf[cnt] = '\n';
5620                 entry->buf[cnt + 1] = '\0';
5621         } else
5622                 entry->buf[cnt] = '\0';
5623
5624         __buffer_unlock_commit(buffer, event);
5625
5626         written = cnt;
5627
5628         *fpos += written;
5629
5630  out_unlock:
5631         for (i = nr_pages - 1; i >= 0; i--) {
5632                 kunmap_atomic(map_page[i]);
5633                 put_page(pages[i]);
5634         }
5635  out:
5636         return written;
5637 }
5638
5639 static int tracing_clock_show(struct seq_file *m, void *v)
5640 {
5641         struct trace_array *tr = m->private;
5642         int i;
5643
5644         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5645                 seq_printf(m,
5646                         "%s%s%s%s", i ? " " : "",
5647                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5648                         i == tr->clock_id ? "]" : "");
5649         seq_putc(m, '\n');
5650
5651         return 0;
5652 }
5653
5654 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5655 {
5656         int i;
5657
5658         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5659                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5660                         break;
5661         }
5662         if (i == ARRAY_SIZE(trace_clocks))
5663                 return -EINVAL;
5664
5665         mutex_lock(&trace_types_lock);
5666
5667         tr->clock_id = i;
5668
5669         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5670
5671         /*
5672          * New clock may not be consistent with the previous clock.
5673          * Reset the buffer so that it doesn't have incomparable timestamps.
5674          */
5675         tracing_reset_online_cpus(&tr->trace_buffer);
5676
5677 #ifdef CONFIG_TRACER_MAX_TRACE
5678         if (tr->max_buffer.buffer)
5679                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5680         tracing_reset_online_cpus(&tr->max_buffer);
5681 #endif
5682
5683         mutex_unlock(&trace_types_lock);
5684
5685         return 0;
5686 }
5687
5688 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5689                                    size_t cnt, loff_t *fpos)
5690 {
5691         struct seq_file *m = filp->private_data;
5692         struct trace_array *tr = m->private;
5693         char buf[64];
5694         const char *clockstr;
5695         int ret;
5696
5697         if (cnt >= sizeof(buf))
5698                 return -EINVAL;
5699
5700         if (copy_from_user(buf, ubuf, cnt))
5701                 return -EFAULT;
5702
5703         buf[cnt] = 0;
5704
5705         clockstr = strstrip(buf);
5706
5707         ret = tracing_set_clock(tr, clockstr);
5708         if (ret)
5709                 return ret;
5710
5711         *fpos += cnt;
5712
5713         return cnt;
5714 }
5715
5716 static int tracing_clock_open(struct inode *inode, struct file *file)
5717 {
5718         struct trace_array *tr = inode->i_private;
5719         int ret;
5720
5721         if (tracing_disabled)
5722                 return -ENODEV;
5723
5724         if (trace_array_get(tr))
5725                 return -ENODEV;
5726
5727         ret = single_open(file, tracing_clock_show, inode->i_private);
5728         if (ret < 0)
5729                 trace_array_put(tr);
5730
5731         return ret;
5732 }
5733
5734 struct ftrace_buffer_info {
5735         struct trace_iterator   iter;
5736         void                    *spare;
5737         unsigned int            read;
5738 };
5739
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5742 {
5743         struct trace_array *tr = inode->i_private;
5744         struct trace_iterator *iter;
5745         struct seq_file *m;
5746         int ret = 0;
5747
5748         if (trace_array_get(tr) < 0)
5749                 return -ENODEV;
5750
5751         if (file->f_mode & FMODE_READ) {
5752                 iter = __tracing_open(inode, file, true);
5753                 if (IS_ERR(iter))
5754                         ret = PTR_ERR(iter);
5755         } else {
5756                 /* Writes still need the seq_file to hold the private data */
5757                 ret = -ENOMEM;
5758                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5759                 if (!m)
5760                         goto out;
5761                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5762                 if (!iter) {
5763                         kfree(m);
5764                         goto out;
5765                 }
5766                 ret = 0;
5767
5768                 iter->tr = tr;
5769                 iter->trace_buffer = &tr->max_buffer;
5770                 iter->cpu_file = tracing_get_cpu(inode);
5771                 m->private = iter;
5772                 file->private_data = m;
5773         }
5774 out:
5775         if (ret < 0)
5776                 trace_array_put(tr);
5777
5778         return ret;
5779 }
5780
5781 static ssize_t
5782 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5783                        loff_t *ppos)
5784 {
5785         struct seq_file *m = filp->private_data;
5786         struct trace_iterator *iter = m->private;
5787         struct trace_array *tr = iter->tr;
5788         unsigned long val;
5789         int ret;
5790
5791         ret = tracing_update_buffers();
5792         if (ret < 0)
5793                 return ret;
5794
5795         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5796         if (ret)
5797                 return ret;
5798
5799         mutex_lock(&trace_types_lock);
5800
5801         if (tr->current_trace->use_max_tr) {
5802                 ret = -EBUSY;
5803                 goto out;
5804         }
5805
5806         switch (val) {
5807         case 0:
5808                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5809                         ret = -EINVAL;
5810                         break;
5811                 }
5812                 if (tr->allocated_snapshot)
5813                         free_snapshot(tr);
5814                 break;
5815         case 1:
5816 /* Only allow per-cpu swap if the ring buffer supports it */
5817 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5818                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5819                         ret = -EINVAL;
5820                         break;
5821                 }
5822 #endif
5823                 if (!tr->allocated_snapshot)
5824                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5825                                 &tr->trace_buffer, iter->cpu_file);
5826                 else
5827                         ret = alloc_snapshot(tr);
5828
5829                 if (ret < 0)
5830                         break;
5831
5832                 local_irq_disable();
5833                 /* Now, we're going to swap */
5834                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5835                         update_max_tr(tr, current, smp_processor_id());
5836                 else
5837                         update_max_tr_single(tr, current, iter->cpu_file);
5838                 local_irq_enable();
5839                 break;
5840         default:
5841                 if (tr->allocated_snapshot) {
5842                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5843                                 tracing_reset_online_cpus(&tr->max_buffer);
5844                         else
5845                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5846                 }
5847                 break;
5848         }
5849
5850         if (ret >= 0) {
5851                 *ppos += cnt;
5852                 ret = cnt;
5853         }
5854 out:
5855         mutex_unlock(&trace_types_lock);
5856         return ret;
5857 }
5858
5859 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5860 {
5861         struct seq_file *m = file->private_data;
5862         int ret;
5863
5864         ret = tracing_release(inode, file);
5865
5866         if (file->f_mode & FMODE_READ)
5867                 return ret;
5868
5869         /* If write only, the seq_file is just a stub */
5870         if (m)
5871                 kfree(m->private);
5872         kfree(m);
5873
5874         return 0;
5875 }
5876
5877 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5878 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5879                                     size_t count, loff_t *ppos);
5880 static int tracing_buffers_release(struct inode *inode, struct file *file);
5881 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5882                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5883
5884 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5885 {
5886         struct ftrace_buffer_info *info;
5887         int ret;
5888
5889         ret = tracing_buffers_open(inode, filp);
5890         if (ret < 0)
5891                 return ret;
5892
5893         info = filp->private_data;
5894
5895         if (info->iter.trace->use_max_tr) {
5896                 tracing_buffers_release(inode, filp);
5897                 return -EBUSY;
5898         }
5899
5900         info->iter.snapshot = true;
5901         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5902
5903         return ret;
5904 }
5905
5906 #endif /* CONFIG_TRACER_SNAPSHOT */
5907
5908
5909 static const struct file_operations tracing_thresh_fops = {
5910         .open           = tracing_open_generic,
5911         .read           = tracing_thresh_read,
5912         .write          = tracing_thresh_write,
5913         .llseek         = generic_file_llseek,
5914 };
5915
5916 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5917 static const struct file_operations tracing_max_lat_fops = {
5918         .open           = tracing_open_generic,
5919         .read           = tracing_max_lat_read,
5920         .write          = tracing_max_lat_write,
5921         .llseek         = generic_file_llseek,
5922 };
5923 #endif
5924
5925 static const struct file_operations set_tracer_fops = {
5926         .open           = tracing_open_generic,
5927         .read           = tracing_set_trace_read,
5928         .write          = tracing_set_trace_write,
5929         .llseek         = generic_file_llseek,
5930 };
5931
5932 static const struct file_operations tracing_pipe_fops = {
5933         .open           = tracing_open_pipe,
5934         .poll           = tracing_poll_pipe,
5935         .read           = tracing_read_pipe,
5936         .splice_read    = tracing_splice_read_pipe,
5937         .release        = tracing_release_pipe,
5938         .llseek         = no_llseek,
5939 };
5940
5941 static const struct file_operations tracing_entries_fops = {
5942         .open           = tracing_open_generic_tr,
5943         .read           = tracing_entries_read,
5944         .write          = tracing_entries_write,
5945         .llseek         = generic_file_llseek,
5946         .release        = tracing_release_generic_tr,
5947 };
5948
5949 static const struct file_operations tracing_total_entries_fops = {
5950         .open           = tracing_open_generic_tr,
5951         .read           = tracing_total_entries_read,
5952         .llseek         = generic_file_llseek,
5953         .release        = tracing_release_generic_tr,
5954 };
5955
5956 static const struct file_operations tracing_free_buffer_fops = {
5957         .open           = tracing_open_generic_tr,
5958         .write          = tracing_free_buffer_write,
5959         .release        = tracing_free_buffer_release,
5960 };
5961
5962 static const struct file_operations tracing_mark_fops = {
5963         .open           = tracing_open_generic_tr,
5964         .write          = tracing_mark_write,
5965         .llseek         = generic_file_llseek,
5966         .release        = tracing_release_generic_tr,
5967 };
5968
5969 static const struct file_operations trace_clock_fops = {
5970         .open           = tracing_clock_open,
5971         .read           = seq_read,
5972         .llseek         = seq_lseek,
5973         .release        = tracing_single_release_tr,
5974         .write          = tracing_clock_write,
5975 };
5976
5977 #ifdef CONFIG_TRACER_SNAPSHOT
5978 static const struct file_operations snapshot_fops = {
5979         .open           = tracing_snapshot_open,
5980         .read           = seq_read,
5981         .write          = tracing_snapshot_write,
5982         .llseek         = tracing_lseek,
5983         .release        = tracing_snapshot_release,
5984 };
5985
5986 static const struct file_operations snapshot_raw_fops = {
5987         .open           = snapshot_raw_open,
5988         .read           = tracing_buffers_read,
5989         .release        = tracing_buffers_release,
5990         .splice_read    = tracing_buffers_splice_read,
5991         .llseek         = no_llseek,
5992 };
5993
5994 #endif /* CONFIG_TRACER_SNAPSHOT */
5995
5996 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5997 {
5998         struct trace_array *tr = inode->i_private;
5999         struct ftrace_buffer_info *info;
6000         int ret;
6001
6002         if (tracing_disabled)
6003                 return -ENODEV;
6004
6005         if (trace_array_get(tr) < 0)
6006                 return -ENODEV;
6007
6008         info = kzalloc(sizeof(*info), GFP_KERNEL);
6009         if (!info) {
6010                 trace_array_put(tr);
6011                 return -ENOMEM;
6012         }
6013
6014         mutex_lock(&trace_types_lock);
6015
6016         info->iter.tr           = tr;
6017         info->iter.cpu_file     = tracing_get_cpu(inode);
6018         info->iter.trace        = tr->current_trace;
6019         info->iter.trace_buffer = &tr->trace_buffer;
6020         info->spare             = NULL;
6021         /* Force reading ring buffer for first read */
6022         info->read              = (unsigned int)-1;
6023
6024         filp->private_data = info;
6025
6026         tr->current_trace->ref++;
6027
6028         mutex_unlock(&trace_types_lock);
6029
6030         ret = nonseekable_open(inode, filp);
6031         if (ret < 0)
6032                 trace_array_put(tr);
6033
6034         return ret;
6035 }
6036
6037 static unsigned int
6038 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6039 {
6040         struct ftrace_buffer_info *info = filp->private_data;
6041         struct trace_iterator *iter = &info->iter;
6042
6043         return trace_poll(iter, filp, poll_table);
6044 }
6045
6046 static ssize_t
6047 tracing_buffers_read(struct file *filp, char __user *ubuf,
6048                      size_t count, loff_t *ppos)
6049 {
6050         struct ftrace_buffer_info *info = filp->private_data;
6051         struct trace_iterator *iter = &info->iter;
6052         ssize_t ret;
6053         ssize_t size;
6054
6055         if (!count)
6056                 return 0;
6057
6058 #ifdef CONFIG_TRACER_MAX_TRACE
6059         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6060                 return -EBUSY;
6061 #endif
6062
6063         if (!info->spare)
6064                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6065                                                           iter->cpu_file);
6066         if (!info->spare)
6067                 return -ENOMEM;
6068
6069         /* Do we have previous read data to read? */
6070         if (info->read < PAGE_SIZE)
6071                 goto read;
6072
6073  again:
6074         trace_access_lock(iter->cpu_file);
6075         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6076                                     &info->spare,
6077                                     count,
6078                                     iter->cpu_file, 0);
6079         trace_access_unlock(iter->cpu_file);
6080
6081         if (ret < 0) {
6082                 if (trace_empty(iter)) {
6083                         if ((filp->f_flags & O_NONBLOCK))
6084                                 return -EAGAIN;
6085
6086                         ret = wait_on_pipe(iter, false);
6087                         if (ret)
6088                                 return ret;
6089
6090                         goto again;
6091                 }
6092                 return 0;
6093         }
6094
6095         info->read = 0;
6096  read:
6097         size = PAGE_SIZE - info->read;
6098         if (size > count)
6099                 size = count;
6100
6101         ret = copy_to_user(ubuf, info->spare + info->read, size);
6102         if (ret == size)
6103                 return -EFAULT;
6104
6105         size -= ret;
6106
6107         *ppos += size;
6108         info->read += size;
6109
6110         return size;
6111 }
6112
6113 static int tracing_buffers_release(struct inode *inode, struct file *file)
6114 {
6115         struct ftrace_buffer_info *info = file->private_data;
6116         struct trace_iterator *iter = &info->iter;
6117
6118         mutex_lock(&trace_types_lock);
6119
6120         iter->tr->current_trace->ref--;
6121
6122         __trace_array_put(iter->tr);
6123
6124         if (info->spare)
6125                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6126         kfree(info);
6127
6128         mutex_unlock(&trace_types_lock);
6129
6130         return 0;
6131 }
6132
6133 struct buffer_ref {
6134         struct ring_buffer      *buffer;
6135         void                    *page;
6136         int                     ref;
6137 };
6138
6139 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6140                                     struct pipe_buffer *buf)
6141 {
6142         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6143
6144         if (--ref->ref)
6145                 return;
6146
6147         ring_buffer_free_read_page(ref->buffer, ref->page);
6148         kfree(ref);
6149         buf->private = 0;
6150 }
6151
6152 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6153                                 struct pipe_buffer *buf)
6154 {
6155         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6156
6157         if (ref->ref > INT_MAX/2)
6158                 return false;
6159
6160         ref->ref++;
6161         return true;
6162 }
6163
6164 /* Pipe buffer operations for a buffer. */
6165 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6166         .can_merge              = 0,
6167         .confirm                = generic_pipe_buf_confirm,
6168         .release                = buffer_pipe_buf_release,
6169         .steal                  = generic_pipe_buf_steal,
6170         .get                    = buffer_pipe_buf_get,
6171 };
6172
6173 /*
6174  * Callback from splice_to_pipe(), if we need to release some pages
6175  * at the end of the spd in case we error'ed out in filling the pipe.
6176  */
6177 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6178 {
6179         struct buffer_ref *ref =
6180                 (struct buffer_ref *)spd->partial[i].private;
6181
6182         if (--ref->ref)
6183                 return;
6184
6185         ring_buffer_free_read_page(ref->buffer, ref->page);
6186         kfree(ref);
6187         spd->partial[i].private = 0;
6188 }
6189
6190 static ssize_t
6191 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6192                             struct pipe_inode_info *pipe, size_t len,
6193                             unsigned int flags)
6194 {
6195         struct ftrace_buffer_info *info = file->private_data;
6196         struct trace_iterator *iter = &info->iter;
6197         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6198         struct page *pages_def[PIPE_DEF_BUFFERS];
6199         struct splice_pipe_desc spd = {
6200                 .pages          = pages_def,
6201                 .partial        = partial_def,
6202                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6203                 .flags          = flags,
6204                 .ops            = &buffer_pipe_buf_ops,
6205                 .spd_release    = buffer_spd_release,
6206         };
6207         struct buffer_ref *ref;
6208         int entries, i;
6209         ssize_t ret = 0;
6210
6211 #ifdef CONFIG_TRACER_MAX_TRACE
6212         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6213                 return -EBUSY;
6214 #endif
6215
6216         if (*ppos & (PAGE_SIZE - 1))
6217                 return -EINVAL;
6218
6219         if (len & (PAGE_SIZE - 1)) {
6220                 if (len < PAGE_SIZE)
6221                         return -EINVAL;
6222                 len &= PAGE_MASK;
6223         }
6224
6225         if (splice_grow_spd(pipe, &spd))
6226                 return -ENOMEM;
6227
6228  again:
6229         trace_access_lock(iter->cpu_file);
6230         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6231
6232         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6233                 struct page *page;
6234                 int r;
6235
6236                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6237                 if (!ref) {
6238                         ret = -ENOMEM;
6239                         break;
6240                 }
6241
6242                 ref->ref = 1;
6243                 ref->buffer = iter->trace_buffer->buffer;
6244                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6245                 if (!ref->page) {
6246                         ret = -ENOMEM;
6247                         kfree(ref);
6248                         break;
6249                 }
6250
6251                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6252                                           len, iter->cpu_file, 1);
6253                 if (r < 0) {
6254                         ring_buffer_free_read_page(ref->buffer, ref->page);
6255                         kfree(ref);
6256                         break;
6257                 }
6258
6259                 page = virt_to_page(ref->page);
6260
6261                 spd.pages[i] = page;
6262                 spd.partial[i].len = PAGE_SIZE;
6263                 spd.partial[i].offset = 0;
6264                 spd.partial[i].private = (unsigned long)ref;
6265                 spd.nr_pages++;
6266                 *ppos += PAGE_SIZE;
6267
6268                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6269         }
6270
6271         trace_access_unlock(iter->cpu_file);
6272         spd.nr_pages = i;
6273
6274         /* did we read anything? */
6275         if (!spd.nr_pages) {
6276                 if (ret)
6277                         goto out;
6278
6279                 ret = -EAGAIN;
6280                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6281                         goto out;
6282
6283                 ret = wait_on_pipe(iter, true);
6284                 if (ret)
6285                         goto out;
6286
6287                 goto again;
6288         }
6289
6290         ret = splice_to_pipe(pipe, &spd);
6291 out:
6292         splice_shrink_spd(&spd);
6293
6294         return ret;
6295 }
6296
6297 static const struct file_operations tracing_buffers_fops = {
6298         .open           = tracing_buffers_open,
6299         .read           = tracing_buffers_read,
6300         .poll           = tracing_buffers_poll,
6301         .release        = tracing_buffers_release,
6302         .splice_read    = tracing_buffers_splice_read,
6303         .llseek         = no_llseek,
6304 };
6305
6306 static ssize_t
6307 tracing_stats_read(struct file *filp, char __user *ubuf,
6308                    size_t count, loff_t *ppos)
6309 {
6310         struct inode *inode = file_inode(filp);
6311         struct trace_array *tr = inode->i_private;
6312         struct trace_buffer *trace_buf = &tr->trace_buffer;
6313         int cpu = tracing_get_cpu(inode);
6314         struct trace_seq *s;
6315         unsigned long cnt;
6316         unsigned long long t;
6317         unsigned long usec_rem;
6318
6319         s = kmalloc(sizeof(*s), GFP_KERNEL);
6320         if (!s)
6321                 return -ENOMEM;
6322
6323         trace_seq_init(s);
6324
6325         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6326         trace_seq_printf(s, "entries: %ld\n", cnt);
6327
6328         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6329         trace_seq_printf(s, "overrun: %ld\n", cnt);
6330
6331         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6332         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6333
6334         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6335         trace_seq_printf(s, "bytes: %ld\n", cnt);
6336
6337         if (trace_clocks[tr->clock_id].in_ns) {
6338                 /* local or global for trace_clock */
6339                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6340                 usec_rem = do_div(t, USEC_PER_SEC);
6341                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6342                                                                 t, usec_rem);
6343
6344                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6345                 usec_rem = do_div(t, USEC_PER_SEC);
6346                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6347         } else {
6348                 /* counter or tsc mode for trace_clock */
6349                 trace_seq_printf(s, "oldest event ts: %llu\n",
6350                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6351
6352                 trace_seq_printf(s, "now ts: %llu\n",
6353                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6354         }
6355
6356         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6357         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6358
6359         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6360         trace_seq_printf(s, "read events: %ld\n", cnt);
6361
6362         count = simple_read_from_buffer(ubuf, count, ppos,
6363                                         s->buffer, trace_seq_used(s));
6364
6365         kfree(s);
6366
6367         return count;
6368 }
6369
6370 static const struct file_operations tracing_stats_fops = {
6371         .open           = tracing_open_generic_tr,
6372         .read           = tracing_stats_read,
6373         .llseek         = generic_file_llseek,
6374         .release        = tracing_release_generic_tr,
6375 };
6376
6377 #ifdef CONFIG_DYNAMIC_FTRACE
6378
6379 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6380 {
6381         return 0;
6382 }
6383
6384 static ssize_t
6385 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6386                   size_t cnt, loff_t *ppos)
6387 {
6388         static char ftrace_dyn_info_buffer[1024];
6389         static DEFINE_MUTEX(dyn_info_mutex);
6390         unsigned long *p = filp->private_data;
6391         char *buf = ftrace_dyn_info_buffer;
6392         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6393         int r;
6394
6395         mutex_lock(&dyn_info_mutex);
6396         r = sprintf(buf, "%ld ", *p);
6397
6398         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6399         buf[r++] = '\n';
6400
6401         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6402
6403         mutex_unlock(&dyn_info_mutex);
6404
6405         return r;
6406 }
6407
6408 static const struct file_operations tracing_dyn_info_fops = {
6409         .open           = tracing_open_generic,
6410         .read           = tracing_read_dyn_info,
6411         .llseek         = generic_file_llseek,
6412 };
6413 #endif /* CONFIG_DYNAMIC_FTRACE */
6414
6415 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6416 static void
6417 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6418 {
6419         tracing_snapshot();
6420 }
6421
6422 static void
6423 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6424 {
6425         unsigned long *count = (long *)data;
6426
6427         if (!*count)
6428                 return;
6429
6430         if (*count != -1)
6431                 (*count)--;
6432
6433         tracing_snapshot();
6434 }
6435
6436 static int
6437 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6438                       struct ftrace_probe_ops *ops, void *data)
6439 {
6440         long count = (long)data;
6441
6442         seq_printf(m, "%ps:", (void *)ip);
6443
6444         seq_puts(m, "snapshot");
6445
6446         if (count == -1)
6447                 seq_puts(m, ":unlimited\n");
6448         else
6449                 seq_printf(m, ":count=%ld\n", count);
6450
6451         return 0;
6452 }
6453
6454 static struct ftrace_probe_ops snapshot_probe_ops = {
6455         .func                   = ftrace_snapshot,
6456         .print                  = ftrace_snapshot_print,
6457 };
6458
6459 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6460         .func                   = ftrace_count_snapshot,
6461         .print                  = ftrace_snapshot_print,
6462 };
6463
6464 static int
6465 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6466                                char *glob, char *cmd, char *param, int enable)
6467 {
6468         struct ftrace_probe_ops *ops;
6469         void *count = (void *)-1;
6470         char *number;
6471         int ret;
6472
6473         /* hash funcs only work with set_ftrace_filter */
6474         if (!enable)
6475                 return -EINVAL;
6476
6477         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6478
6479         if (glob[0] == '!') {
6480                 unregister_ftrace_function_probe_func(glob+1, ops);
6481                 return 0;
6482         }
6483
6484         if (!param)
6485                 goto out_reg;
6486
6487         number = strsep(&param, ":");
6488
6489         if (!strlen(number))
6490                 goto out_reg;
6491
6492         /*
6493          * We use the callback data field (which is a pointer)
6494          * as our counter.
6495          */
6496         ret = kstrtoul(number, 0, (unsigned long *)&count);
6497         if (ret)
6498                 return ret;
6499
6500  out_reg:
6501         ret = alloc_snapshot(&global_trace);
6502         if (ret < 0)
6503                 goto out;
6504
6505         ret = register_ftrace_function_probe(glob, ops, count);
6506
6507  out:
6508         return ret < 0 ? ret : 0;
6509 }
6510
6511 static struct ftrace_func_command ftrace_snapshot_cmd = {
6512         .name                   = "snapshot",
6513         .func                   = ftrace_trace_snapshot_callback,
6514 };
6515
6516 static __init int register_snapshot_cmd(void)
6517 {
6518         return register_ftrace_command(&ftrace_snapshot_cmd);
6519 }
6520 #else
6521 static inline __init int register_snapshot_cmd(void) { return 0; }
6522 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6523
6524 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6525 {
6526         if (WARN_ON(!tr->dir))
6527                 return ERR_PTR(-ENODEV);
6528
6529         /* Top directory uses NULL as the parent */
6530         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6531                 return NULL;
6532
6533         /* All sub buffers have a descriptor */
6534         return tr->dir;
6535 }
6536
6537 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6538 {
6539         struct dentry *d_tracer;
6540
6541         if (tr->percpu_dir)
6542                 return tr->percpu_dir;
6543
6544         d_tracer = tracing_get_dentry(tr);
6545         if (IS_ERR(d_tracer))
6546                 return NULL;
6547
6548         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6549
6550         WARN_ONCE(!tr->percpu_dir,
6551                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6552
6553         return tr->percpu_dir;
6554 }
6555
6556 static struct dentry *
6557 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6558                       void *data, long cpu, const struct file_operations *fops)
6559 {
6560         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6561
6562         if (ret) /* See tracing_get_cpu() */
6563                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6564         return ret;
6565 }
6566
6567 static void
6568 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6569 {
6570         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6571         struct dentry *d_cpu;
6572         char cpu_dir[30]; /* 30 characters should be more than enough */
6573
6574         if (!d_percpu)
6575                 return;
6576
6577         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6578         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6579         if (!d_cpu) {
6580                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6581                 return;
6582         }
6583
6584         /* per cpu trace_pipe */
6585         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6586                                 tr, cpu, &tracing_pipe_fops);
6587
6588         /* per cpu trace */
6589         trace_create_cpu_file("trace", 0644, d_cpu,
6590                                 tr, cpu, &tracing_fops);
6591
6592         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6593                                 tr, cpu, &tracing_buffers_fops);
6594
6595         trace_create_cpu_file("stats", 0444, d_cpu,
6596                                 tr, cpu, &tracing_stats_fops);
6597
6598         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6599                                 tr, cpu, &tracing_entries_fops);
6600
6601 #ifdef CONFIG_TRACER_SNAPSHOT
6602         trace_create_cpu_file("snapshot", 0644, d_cpu,
6603                                 tr, cpu, &snapshot_fops);
6604
6605         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6606                                 tr, cpu, &snapshot_raw_fops);
6607 #endif
6608 }
6609
6610 #ifdef CONFIG_FTRACE_SELFTEST
6611 /* Let selftest have access to static functions in this file */
6612 #include "trace_selftest.c"
6613 #endif
6614
6615 static ssize_t
6616 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6617                         loff_t *ppos)
6618 {
6619         struct trace_option_dentry *topt = filp->private_data;
6620         char *buf;
6621
6622         if (topt->flags->val & topt->opt->bit)
6623                 buf = "1\n";
6624         else
6625                 buf = "0\n";
6626
6627         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6628 }
6629
6630 static ssize_t
6631 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6632                          loff_t *ppos)
6633 {
6634         struct trace_option_dentry *topt = filp->private_data;
6635         unsigned long val;
6636         int ret;
6637
6638         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6639         if (ret)
6640                 return ret;
6641
6642         if (val != 0 && val != 1)
6643                 return -EINVAL;
6644
6645         if (!!(topt->flags->val & topt->opt->bit) != val) {
6646                 mutex_lock(&trace_types_lock);
6647                 ret = __set_tracer_option(topt->tr, topt->flags,
6648                                           topt->opt, !val);
6649                 mutex_unlock(&trace_types_lock);
6650                 if (ret)
6651                         return ret;
6652         }
6653
6654         *ppos += cnt;
6655
6656         return cnt;
6657 }
6658
6659
6660 static const struct file_operations trace_options_fops = {
6661         .open = tracing_open_generic,
6662         .read = trace_options_read,
6663         .write = trace_options_write,
6664         .llseek = generic_file_llseek,
6665 };
6666
6667 /*
6668  * In order to pass in both the trace_array descriptor as well as the index
6669  * to the flag that the trace option file represents, the trace_array
6670  * has a character array of trace_flags_index[], which holds the index
6671  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6672  * The address of this character array is passed to the flag option file
6673  * read/write callbacks.
6674  *
6675  * In order to extract both the index and the trace_array descriptor,
6676  * get_tr_index() uses the following algorithm.
6677  *
6678  *   idx = *ptr;
6679  *
6680  * As the pointer itself contains the address of the index (remember
6681  * index[1] == 1).
6682  *
6683  * Then to get the trace_array descriptor, by subtracting that index
6684  * from the ptr, we get to the start of the index itself.
6685  *
6686  *   ptr - idx == &index[0]
6687  *
6688  * Then a simple container_of() from that pointer gets us to the
6689  * trace_array descriptor.
6690  */
6691 static void get_tr_index(void *data, struct trace_array **ptr,
6692                          unsigned int *pindex)
6693 {
6694         *pindex = *(unsigned char *)data;
6695
6696         *ptr = container_of(data - *pindex, struct trace_array,
6697                             trace_flags_index);
6698 }
6699
6700 static ssize_t
6701 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6702                         loff_t *ppos)
6703 {
6704         void *tr_index = filp->private_data;
6705         struct trace_array *tr;
6706         unsigned int index;
6707         char *buf;
6708
6709         get_tr_index(tr_index, &tr, &index);
6710
6711         if (tr->trace_flags & (1 << index))
6712                 buf = "1\n";
6713         else
6714                 buf = "0\n";
6715
6716         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6717 }
6718
6719 static ssize_t
6720 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6721                          loff_t *ppos)
6722 {
6723         void *tr_index = filp->private_data;
6724         struct trace_array *tr;
6725         unsigned int index;
6726         unsigned long val;
6727         int ret;
6728
6729         get_tr_index(tr_index, &tr, &index);
6730
6731         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6732         if (ret)
6733                 return ret;
6734
6735         if (val != 0 && val != 1)
6736                 return -EINVAL;
6737
6738         mutex_lock(&trace_types_lock);
6739         ret = set_tracer_flag(tr, 1 << index, val);
6740         mutex_unlock(&trace_types_lock);
6741
6742         if (ret < 0)
6743                 return ret;
6744
6745         *ppos += cnt;
6746
6747         return cnt;
6748 }
6749
6750 static const struct file_operations trace_options_core_fops = {
6751         .open = tracing_open_generic,
6752         .read = trace_options_core_read,
6753         .write = trace_options_core_write,
6754         .llseek = generic_file_llseek,
6755 };
6756
6757 struct dentry *trace_create_file(const char *name,
6758                                  umode_t mode,
6759                                  struct dentry *parent,
6760                                  void *data,
6761                                  const struct file_operations *fops)
6762 {
6763         struct dentry *ret;
6764
6765         ret = tracefs_create_file(name, mode, parent, data, fops);
6766         if (!ret)
6767                 pr_warn("Could not create tracefs '%s' entry\n", name);
6768
6769         return ret;
6770 }
6771
6772
6773 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6774 {
6775         struct dentry *d_tracer;
6776
6777         if (tr->options)
6778                 return tr->options;
6779
6780         d_tracer = tracing_get_dentry(tr);
6781         if (IS_ERR(d_tracer))
6782                 return NULL;
6783
6784         tr->options = tracefs_create_dir("options", d_tracer);
6785         if (!tr->options) {
6786                 pr_warn("Could not create tracefs directory 'options'\n");
6787                 return NULL;
6788         }
6789
6790         return tr->options;
6791 }
6792
6793 static void
6794 create_trace_option_file(struct trace_array *tr,
6795                          struct trace_option_dentry *topt,
6796                          struct tracer_flags *flags,
6797                          struct tracer_opt *opt)
6798 {
6799         struct dentry *t_options;
6800
6801         t_options = trace_options_init_dentry(tr);
6802         if (!t_options)
6803                 return;
6804
6805         topt->flags = flags;
6806         topt->opt = opt;
6807         topt->tr = tr;
6808
6809         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6810                                     &trace_options_fops);
6811
6812 }
6813
6814 static void
6815 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6816 {
6817         struct trace_option_dentry *topts;
6818         struct trace_options *tr_topts;
6819         struct tracer_flags *flags;
6820         struct tracer_opt *opts;
6821         int cnt;
6822         int i;
6823
6824         if (!tracer)
6825                 return;
6826
6827         flags = tracer->flags;
6828
6829         if (!flags || !flags->opts)
6830                 return;
6831
6832         /*
6833          * If this is an instance, only create flags for tracers
6834          * the instance may have.
6835          */
6836         if (!trace_ok_for_array(tracer, tr))
6837                 return;
6838
6839         for (i = 0; i < tr->nr_topts; i++) {
6840                 /* Make sure there's no duplicate flags. */
6841                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6842                         return;
6843         }
6844
6845         opts = flags->opts;
6846
6847         for (cnt = 0; opts[cnt].name; cnt++)
6848                 ;
6849
6850         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6851         if (!topts)
6852                 return;
6853
6854         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6855                             GFP_KERNEL);
6856         if (!tr_topts) {
6857                 kfree(topts);
6858                 return;
6859         }
6860
6861         tr->topts = tr_topts;
6862         tr->topts[tr->nr_topts].tracer = tracer;
6863         tr->topts[tr->nr_topts].topts = topts;
6864         tr->nr_topts++;
6865
6866         for (cnt = 0; opts[cnt].name; cnt++) {
6867                 create_trace_option_file(tr, &topts[cnt], flags,
6868                                          &opts[cnt]);
6869                 WARN_ONCE(topts[cnt].entry == NULL,
6870                           "Failed to create trace option: %s",
6871                           opts[cnt].name);
6872         }
6873 }
6874
6875 static struct dentry *
6876 create_trace_option_core_file(struct trace_array *tr,
6877                               const char *option, long index)
6878 {
6879         struct dentry *t_options;
6880
6881         t_options = trace_options_init_dentry(tr);
6882         if (!t_options)
6883                 return NULL;
6884
6885         return trace_create_file(option, 0644, t_options,
6886                                  (void *)&tr->trace_flags_index[index],
6887                                  &trace_options_core_fops);
6888 }
6889
6890 static void create_trace_options_dir(struct trace_array *tr)
6891 {
6892         struct dentry *t_options;
6893         bool top_level = tr == &global_trace;
6894         int i;
6895
6896         t_options = trace_options_init_dentry(tr);
6897         if (!t_options)
6898                 return;
6899
6900         for (i = 0; trace_options[i]; i++) {
6901                 if (top_level ||
6902                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6903                         create_trace_option_core_file(tr, trace_options[i], i);
6904         }
6905 }
6906
6907 static ssize_t
6908 rb_simple_read(struct file *filp, char __user *ubuf,
6909                size_t cnt, loff_t *ppos)
6910 {
6911         struct trace_array *tr = filp->private_data;
6912         char buf[64];
6913         int r;
6914
6915         r = tracer_tracing_is_on(tr);
6916         r = sprintf(buf, "%d\n", r);
6917
6918         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6919 }
6920
6921 static ssize_t
6922 rb_simple_write(struct file *filp, const char __user *ubuf,
6923                 size_t cnt, loff_t *ppos)
6924 {
6925         struct trace_array *tr = filp->private_data;
6926         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6927         unsigned long val;
6928         int ret;
6929
6930         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6931         if (ret)
6932                 return ret;
6933
6934         if (buffer) {
6935                 mutex_lock(&trace_types_lock);
6936                 if (!!val == tracer_tracing_is_on(tr)) {
6937                         val = 0; /* do nothing */
6938                 } else if (val) {
6939                         tracer_tracing_on(tr);
6940                         if (tr->current_trace->start)
6941                                 tr->current_trace->start(tr);
6942                 } else {
6943                         tracer_tracing_off(tr);
6944                         if (tr->current_trace->stop)
6945                                 tr->current_trace->stop(tr);
6946                 }
6947                 mutex_unlock(&trace_types_lock);
6948         }
6949
6950         (*ppos)++;
6951
6952         return cnt;
6953 }
6954
6955 static const struct file_operations rb_simple_fops = {
6956         .open           = tracing_open_generic_tr,
6957         .read           = rb_simple_read,
6958         .write          = rb_simple_write,
6959         .release        = tracing_release_generic_tr,
6960         .llseek         = default_llseek,
6961 };
6962
6963 struct dentry *trace_instance_dir;
6964
6965 static void
6966 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6967
6968 static int
6969 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6970 {
6971         enum ring_buffer_flags rb_flags;
6972
6973         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6974
6975         buf->tr = tr;
6976
6977         buf->buffer = ring_buffer_alloc(size, rb_flags);
6978         if (!buf->buffer)
6979                 return -ENOMEM;
6980
6981         buf->data = alloc_percpu(struct trace_array_cpu);
6982         if (!buf->data) {
6983                 ring_buffer_free(buf->buffer);
6984                 buf->buffer = NULL;
6985                 return -ENOMEM;
6986         }
6987
6988         /* Allocate the first page for all buffers */
6989         set_buffer_entries(&tr->trace_buffer,
6990                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6991
6992         return 0;
6993 }
6994
6995 static int allocate_trace_buffers(struct trace_array *tr, int size)
6996 {
6997         int ret;
6998
6999         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7000         if (ret)
7001                 return ret;
7002
7003 #ifdef CONFIG_TRACER_MAX_TRACE
7004         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7005                                     allocate_snapshot ? size : 1);
7006         if (WARN_ON(ret)) {
7007                 ring_buffer_free(tr->trace_buffer.buffer);
7008                 tr->trace_buffer.buffer = NULL;
7009                 free_percpu(tr->trace_buffer.data);
7010                 tr->trace_buffer.data = NULL;
7011                 return -ENOMEM;
7012         }
7013         tr->allocated_snapshot = allocate_snapshot;
7014
7015         /*
7016          * Only the top level trace array gets its snapshot allocated
7017          * from the kernel command line.
7018          */
7019         allocate_snapshot = false;
7020 #endif
7021
7022         /*
7023          * Because of some magic with the way alloc_percpu() works on
7024          * x86_64, we need to synchronize the pgd of all the tables,
7025          * otherwise the trace events that happen in x86_64 page fault
7026          * handlers can't cope with accessing the chance that a
7027          * alloc_percpu()'d memory might be touched in the page fault trace
7028          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7029          * calls in tracing, because something might get triggered within a
7030          * page fault trace event!
7031          */
7032         vmalloc_sync_mappings();
7033
7034         return 0;
7035 }
7036
7037 static void free_trace_buffer(struct trace_buffer *buf)
7038 {
7039         if (buf->buffer) {
7040                 ring_buffer_free(buf->buffer);
7041                 buf->buffer = NULL;
7042                 free_percpu(buf->data);
7043                 buf->data = NULL;
7044         }
7045 }
7046
7047 static void free_trace_buffers(struct trace_array *tr)
7048 {
7049         if (!tr)
7050                 return;
7051
7052         free_trace_buffer(&tr->trace_buffer);
7053
7054 #ifdef CONFIG_TRACER_MAX_TRACE
7055         free_trace_buffer(&tr->max_buffer);
7056 #endif
7057 }
7058
7059 static void init_trace_flags_index(struct trace_array *tr)
7060 {
7061         int i;
7062
7063         /* Used by the trace options files */
7064         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7065                 tr->trace_flags_index[i] = i;
7066 }
7067
7068 static void __update_tracer_options(struct trace_array *tr)
7069 {
7070         struct tracer *t;
7071
7072         for (t = trace_types; t; t = t->next)
7073                 add_tracer_options(tr, t);
7074 }
7075
7076 static void update_tracer_options(struct trace_array *tr)
7077 {
7078         mutex_lock(&trace_types_lock);
7079         __update_tracer_options(tr);
7080         mutex_unlock(&trace_types_lock);
7081 }
7082
7083 static int instance_mkdir(const char *name)
7084 {
7085         struct trace_array *tr;
7086         int ret;
7087
7088         mutex_lock(&trace_types_lock);
7089
7090         ret = -EEXIST;
7091         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7092                 if (tr->name && strcmp(tr->name, name) == 0)
7093                         goto out_unlock;
7094         }
7095
7096         ret = -ENOMEM;
7097         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7098         if (!tr)
7099                 goto out_unlock;
7100
7101         tr->name = kstrdup(name, GFP_KERNEL);
7102         if (!tr->name)
7103                 goto out_free_tr;
7104
7105         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7106                 goto out_free_tr;
7107
7108         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7109
7110         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7111
7112         raw_spin_lock_init(&tr->start_lock);
7113
7114         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7115
7116         tr->current_trace = &nop_trace;
7117
7118         INIT_LIST_HEAD(&tr->systems);
7119         INIT_LIST_HEAD(&tr->events);
7120
7121         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7122                 goto out_free_tr;
7123
7124         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7125         if (!tr->dir)
7126                 goto out_free_tr;
7127
7128         ret = event_trace_add_tracer(tr->dir, tr);
7129         if (ret) {
7130                 tracefs_remove_recursive(tr->dir);
7131                 goto out_free_tr;
7132         }
7133
7134         init_tracer_tracefs(tr, tr->dir);
7135         init_trace_flags_index(tr);
7136         __update_tracer_options(tr);
7137
7138         list_add(&tr->list, &ftrace_trace_arrays);
7139
7140         mutex_unlock(&trace_types_lock);
7141
7142         return 0;
7143
7144  out_free_tr:
7145         free_trace_buffers(tr);
7146         free_cpumask_var(tr->tracing_cpumask);
7147         kfree(tr->name);
7148         kfree(tr);
7149
7150  out_unlock:
7151         mutex_unlock(&trace_types_lock);
7152
7153         return ret;
7154
7155 }
7156
7157 static int instance_rmdir(const char *name)
7158 {
7159         struct trace_array *tr;
7160         int found = 0;
7161         int ret;
7162         int i;
7163
7164         mutex_lock(&trace_types_lock);
7165
7166         ret = -ENODEV;
7167         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7168                 if (tr->name && strcmp(tr->name, name) == 0) {
7169                         found = 1;
7170                         break;
7171                 }
7172         }
7173         if (!found)
7174                 goto out_unlock;
7175
7176         ret = -EBUSY;
7177         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7178                 goto out_unlock;
7179
7180         list_del(&tr->list);
7181
7182         /* Disable all the flags that were enabled coming in */
7183         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7184                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7185                         set_tracer_flag(tr, 1 << i, 0);
7186         }
7187
7188         tracing_set_nop(tr);
7189         event_trace_del_tracer(tr);
7190         ftrace_clear_pids(tr);
7191         ftrace_destroy_function_files(tr);
7192         tracefs_remove_recursive(tr->dir);
7193         free_trace_buffers(tr);
7194
7195         for (i = 0; i < tr->nr_topts; i++) {
7196                 kfree(tr->topts[i].topts);
7197         }
7198         kfree(tr->topts);
7199
7200         free_cpumask_var(tr->tracing_cpumask);
7201         kfree(tr->name);
7202         kfree(tr);
7203
7204         ret = 0;
7205
7206  out_unlock:
7207         mutex_unlock(&trace_types_lock);
7208
7209         return ret;
7210 }
7211
7212 static __init void create_trace_instances(struct dentry *d_tracer)
7213 {
7214         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7215                                                          instance_mkdir,
7216                                                          instance_rmdir);
7217         if (WARN_ON(!trace_instance_dir))
7218                 return;
7219 }
7220
7221 static void
7222 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7223 {
7224         int cpu;
7225
7226         trace_create_file("available_tracers", 0444, d_tracer,
7227                         tr, &show_traces_fops);
7228
7229         trace_create_file("current_tracer", 0644, d_tracer,
7230                         tr, &set_tracer_fops);
7231
7232         trace_create_file("tracing_cpumask", 0644, d_tracer,
7233                           tr, &tracing_cpumask_fops);
7234
7235         trace_create_file("trace_options", 0644, d_tracer,
7236                           tr, &tracing_iter_fops);
7237
7238         trace_create_file("trace", 0644, d_tracer,
7239                           tr, &tracing_fops);
7240
7241         trace_create_file("trace_pipe", 0444, d_tracer,
7242                           tr, &tracing_pipe_fops);
7243
7244         trace_create_file("buffer_size_kb", 0644, d_tracer,
7245                           tr, &tracing_entries_fops);
7246
7247         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7248                           tr, &tracing_total_entries_fops);
7249
7250         trace_create_file("free_buffer", 0200, d_tracer,
7251                           tr, &tracing_free_buffer_fops);
7252
7253         trace_create_file("trace_marker", 0220, d_tracer,
7254                           tr, &tracing_mark_fops);
7255
7256         trace_create_file("trace_clock", 0644, d_tracer, tr,
7257                           &trace_clock_fops);
7258
7259         trace_create_file("tracing_on", 0644, d_tracer,
7260                           tr, &rb_simple_fops);
7261
7262         create_trace_options_dir(tr);
7263
7264 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7265         trace_create_file("tracing_max_latency", 0644, d_tracer,
7266                         &tr->max_latency, &tracing_max_lat_fops);
7267 #endif
7268
7269         if (ftrace_create_function_files(tr, d_tracer))
7270                 WARN(1, "Could not allocate function filter files");
7271
7272 #ifdef CONFIG_TRACER_SNAPSHOT
7273         trace_create_file("snapshot", 0644, d_tracer,
7274                           tr, &snapshot_fops);
7275 #endif
7276
7277         for_each_tracing_cpu(cpu)
7278                 tracing_init_tracefs_percpu(tr, cpu);
7279
7280         ftrace_init_tracefs(tr, d_tracer);
7281 }
7282
7283 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7284 {
7285         struct vfsmount *mnt;
7286         struct file_system_type *type;
7287
7288         /*
7289          * To maintain backward compatibility for tools that mount
7290          * debugfs to get to the tracing facility, tracefs is automatically
7291          * mounted to the debugfs/tracing directory.
7292          */
7293         type = get_fs_type("tracefs");
7294         if (!type)
7295                 return NULL;
7296         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7297         put_filesystem(type);
7298         if (IS_ERR(mnt))
7299                 return NULL;
7300         mntget(mnt);
7301
7302         return mnt;
7303 }
7304
7305 /**
7306  * tracing_init_dentry - initialize top level trace array
7307  *
7308  * This is called when creating files or directories in the tracing
7309  * directory. It is called via fs_initcall() by any of the boot up code
7310  * and expects to return the dentry of the top level tracing directory.
7311  */
7312 struct dentry *tracing_init_dentry(void)
7313 {
7314         struct trace_array *tr = &global_trace;
7315
7316         /* The top level trace array uses  NULL as parent */
7317         if (tr->dir)
7318                 return NULL;
7319
7320         if (WARN_ON(!tracefs_initialized()) ||
7321                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7322                  WARN_ON(!debugfs_initialized())))
7323                 return ERR_PTR(-ENODEV);
7324
7325         /*
7326          * As there may still be users that expect the tracing
7327          * files to exist in debugfs/tracing, we must automount
7328          * the tracefs file system there, so older tools still
7329          * work with the newer kerenl.
7330          */
7331         tr->dir = debugfs_create_automount("tracing", NULL,
7332                                            trace_automount, NULL);
7333         if (!tr->dir) {
7334                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7335                 return ERR_PTR(-ENOMEM);
7336         }
7337
7338         return NULL;
7339 }
7340
7341 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7342 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7343
7344 static void __init trace_enum_init(void)
7345 {
7346         int len;
7347
7348         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7349         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7350 }
7351
7352 #ifdef CONFIG_MODULES
7353 static void trace_module_add_enums(struct module *mod)
7354 {
7355         if (!mod->num_trace_enums)
7356                 return;
7357
7358         /*
7359          * Modules with bad taint do not have events created, do
7360          * not bother with enums either.
7361          */
7362         if (trace_module_has_bad_taint(mod))
7363                 return;
7364
7365         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7366 }
7367
7368 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7369 static void trace_module_remove_enums(struct module *mod)
7370 {
7371         union trace_enum_map_item *map;
7372         union trace_enum_map_item **last = &trace_enum_maps;
7373
7374         if (!mod->num_trace_enums)
7375                 return;
7376
7377         mutex_lock(&trace_enum_mutex);
7378
7379         map = trace_enum_maps;
7380
7381         while (map) {
7382                 if (map->head.mod == mod)
7383                         break;
7384                 map = trace_enum_jmp_to_tail(map);
7385                 last = &map->tail.next;
7386                 map = map->tail.next;
7387         }
7388         if (!map)
7389                 goto out;
7390
7391         *last = trace_enum_jmp_to_tail(map)->tail.next;
7392         kfree(map);
7393  out:
7394         mutex_unlock(&trace_enum_mutex);
7395 }
7396 #else
7397 static inline void trace_module_remove_enums(struct module *mod) { }
7398 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7399
7400 static int trace_module_notify(struct notifier_block *self,
7401                                unsigned long val, void *data)
7402 {
7403         struct module *mod = data;
7404
7405         switch (val) {
7406         case MODULE_STATE_COMING:
7407                 trace_module_add_enums(mod);
7408                 break;
7409         case MODULE_STATE_GOING:
7410                 trace_module_remove_enums(mod);
7411                 break;
7412         }
7413
7414         return 0;
7415 }
7416
7417 static struct notifier_block trace_module_nb = {
7418         .notifier_call = trace_module_notify,
7419         .priority = 0,
7420 };
7421 #endif /* CONFIG_MODULES */
7422
7423 static __init int tracer_init_tracefs(void)
7424 {
7425         struct dentry *d_tracer;
7426
7427         trace_access_lock_init();
7428
7429         d_tracer = tracing_init_dentry();
7430         if (IS_ERR(d_tracer))
7431                 return 0;
7432
7433         init_tracer_tracefs(&global_trace, d_tracer);
7434         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7435
7436         trace_create_file("tracing_thresh", 0644, d_tracer,
7437                         &global_trace, &tracing_thresh_fops);
7438
7439         trace_create_file("README", 0444, d_tracer,
7440                         NULL, &tracing_readme_fops);
7441
7442         trace_create_file("saved_cmdlines", 0444, d_tracer,
7443                         NULL, &tracing_saved_cmdlines_fops);
7444
7445         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7446                           NULL, &tracing_saved_cmdlines_size_fops);
7447
7448         trace_enum_init();
7449
7450         trace_create_enum_file(d_tracer);
7451
7452 #ifdef CONFIG_MODULES
7453         register_module_notifier(&trace_module_nb);
7454 #endif
7455
7456 #ifdef CONFIG_DYNAMIC_FTRACE
7457         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7458                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7459 #endif
7460
7461         create_trace_instances(d_tracer);
7462
7463         update_tracer_options(&global_trace);
7464
7465         return 0;
7466 }
7467
7468 static int trace_panic_handler(struct notifier_block *this,
7469                                unsigned long event, void *unused)
7470 {
7471         if (ftrace_dump_on_oops)
7472                 ftrace_dump(ftrace_dump_on_oops);
7473         return NOTIFY_OK;
7474 }
7475
7476 static struct notifier_block trace_panic_notifier = {
7477         .notifier_call  = trace_panic_handler,
7478         .next           = NULL,
7479         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7480 };
7481
7482 static int trace_die_handler(struct notifier_block *self,
7483                              unsigned long val,
7484                              void *data)
7485 {
7486         switch (val) {
7487         case DIE_OOPS:
7488                 if (ftrace_dump_on_oops)
7489                         ftrace_dump(ftrace_dump_on_oops);
7490                 break;
7491         default:
7492                 break;
7493         }
7494         return NOTIFY_OK;
7495 }
7496
7497 static struct notifier_block trace_die_notifier = {
7498         .notifier_call = trace_die_handler,
7499         .priority = 200
7500 };
7501
7502 /*
7503  * printk is set to max of 1024, we really don't need it that big.
7504  * Nothing should be printing 1000 characters anyway.
7505  */
7506 #define TRACE_MAX_PRINT         1000
7507
7508 /*
7509  * Define here KERN_TRACE so that we have one place to modify
7510  * it if we decide to change what log level the ftrace dump
7511  * should be at.
7512  */
7513 #define KERN_TRACE              KERN_EMERG
7514
7515 void
7516 trace_printk_seq(struct trace_seq *s)
7517 {
7518         /* Probably should print a warning here. */
7519         if (s->seq.len >= TRACE_MAX_PRINT)
7520                 s->seq.len = TRACE_MAX_PRINT;
7521
7522         /*
7523          * More paranoid code. Although the buffer size is set to
7524          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7525          * an extra layer of protection.
7526          */
7527         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7528                 s->seq.len = s->seq.size - 1;
7529
7530         /* should be zero ended, but we are paranoid. */
7531         s->buffer[s->seq.len] = 0;
7532
7533         printk(KERN_TRACE "%s", s->buffer);
7534
7535         trace_seq_init(s);
7536 }
7537
7538 void trace_init_global_iter(struct trace_iterator *iter)
7539 {
7540         iter->tr = &global_trace;
7541         iter->trace = iter->tr->current_trace;
7542         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7543         iter->trace_buffer = &global_trace.trace_buffer;
7544
7545         if (iter->trace && iter->trace->open)
7546                 iter->trace->open(iter);
7547
7548         /* Annotate start of buffers if we had overruns */
7549         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7550                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7551
7552         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7553         if (trace_clocks[iter->tr->clock_id].in_ns)
7554                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7555 }
7556
7557 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7558 {
7559         /* use static because iter can be a bit big for the stack */
7560         static struct trace_iterator iter;
7561         static atomic_t dump_running;
7562         struct trace_array *tr = &global_trace;
7563         unsigned int old_userobj;
7564         unsigned long flags;
7565         int cnt = 0, cpu;
7566
7567         /* Only allow one dump user at a time. */
7568         if (atomic_inc_return(&dump_running) != 1) {
7569                 atomic_dec(&dump_running);
7570                 return;
7571         }
7572
7573         /*
7574          * Always turn off tracing when we dump.
7575          * We don't need to show trace output of what happens
7576          * between multiple crashes.
7577          *
7578          * If the user does a sysrq-z, then they can re-enable
7579          * tracing with echo 1 > tracing_on.
7580          */
7581         tracing_off();
7582
7583         local_irq_save(flags);
7584
7585         /* Simulate the iterator */
7586         trace_init_global_iter(&iter);
7587
7588         for_each_tracing_cpu(cpu) {
7589                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7590         }
7591
7592         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7593
7594         /* don't look at user memory in panic mode */
7595         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7596
7597         switch (oops_dump_mode) {
7598         case DUMP_ALL:
7599                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7600                 break;
7601         case DUMP_ORIG:
7602                 iter.cpu_file = raw_smp_processor_id();
7603                 break;
7604         case DUMP_NONE:
7605                 goto out_enable;
7606         default:
7607                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7608                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7609         }
7610
7611         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7612
7613         /* Did function tracer already get disabled? */
7614         if (ftrace_is_dead()) {
7615                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7616                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7617         }
7618
7619         /*
7620          * We need to stop all tracing on all CPUS to read the
7621          * the next buffer. This is a bit expensive, but is
7622          * not done often. We fill all what we can read,
7623          * and then release the locks again.
7624          */
7625
7626         while (!trace_empty(&iter)) {
7627
7628                 if (!cnt)
7629                         printk(KERN_TRACE "---------------------------------\n");
7630
7631                 cnt++;
7632
7633                 trace_iterator_reset(&iter);
7634                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7635
7636                 if (trace_find_next_entry_inc(&iter) != NULL) {
7637                         int ret;
7638
7639                         ret = print_trace_line(&iter);
7640                         if (ret != TRACE_TYPE_NO_CONSUME)
7641                                 trace_consume(&iter);
7642                 }
7643                 touch_nmi_watchdog();
7644
7645                 trace_printk_seq(&iter.seq);
7646         }
7647
7648         if (!cnt)
7649                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7650         else
7651                 printk(KERN_TRACE "---------------------------------\n");
7652
7653  out_enable:
7654         tr->trace_flags |= old_userobj;
7655
7656         for_each_tracing_cpu(cpu) {
7657                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7658         }
7659         atomic_dec(&dump_running);
7660         local_irq_restore(flags);
7661 }
7662 EXPORT_SYMBOL_GPL(ftrace_dump);
7663
7664 __init static int tracer_alloc_buffers(void)
7665 {
7666         int ring_buf_size;
7667         int ret = -ENOMEM;
7668
7669         /*
7670          * Make sure we don't accidently add more trace options
7671          * than we have bits for.
7672          */
7673         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7674
7675         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7676                 goto out;
7677
7678         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7679                 goto out_free_buffer_mask;
7680
7681         /* Only allocate trace_printk buffers if a trace_printk exists */
7682         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7683                 /* Must be called before global_trace.buffer is allocated */
7684                 trace_printk_init_buffers();
7685
7686         /* To save memory, keep the ring buffer size to its minimum */
7687         if (ring_buffer_expanded)
7688                 ring_buf_size = trace_buf_size;
7689         else
7690                 ring_buf_size = 1;
7691
7692         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7693         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7694
7695         raw_spin_lock_init(&global_trace.start_lock);
7696
7697         /* Used for event triggers */
7698         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7699         if (!temp_buffer)
7700                 goto out_free_cpumask;
7701
7702         if (trace_create_savedcmd() < 0)
7703                 goto out_free_temp_buffer;
7704
7705         /* TODO: make the number of buffers hot pluggable with CPUS */
7706         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7707                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7708                 WARN_ON(1);
7709                 goto out_free_savedcmd;
7710         }
7711
7712         if (global_trace.buffer_disabled)
7713                 tracing_off();
7714
7715         if (trace_boot_clock) {
7716                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7717                 if (ret < 0)
7718                         pr_warn("Trace clock %s not defined, going back to default\n",
7719                                 trace_boot_clock);
7720         }
7721
7722         /*
7723          * register_tracer() might reference current_trace, so it
7724          * needs to be set before we register anything. This is
7725          * just a bootstrap of current_trace anyway.
7726          */
7727         global_trace.current_trace = &nop_trace;
7728
7729         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7730
7731         ftrace_init_global_array_ops(&global_trace);
7732
7733         init_trace_flags_index(&global_trace);
7734
7735         register_tracer(&nop_trace);
7736
7737         /* All seems OK, enable tracing */
7738         tracing_disabled = 0;
7739
7740         atomic_notifier_chain_register(&panic_notifier_list,
7741                                        &trace_panic_notifier);
7742
7743         register_die_notifier(&trace_die_notifier);
7744
7745         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7746
7747         INIT_LIST_HEAD(&global_trace.systems);
7748         INIT_LIST_HEAD(&global_trace.events);
7749         list_add(&global_trace.list, &ftrace_trace_arrays);
7750
7751         apply_trace_boot_options();
7752
7753         register_snapshot_cmd();
7754
7755         return 0;
7756
7757 out_free_savedcmd:
7758         free_saved_cmdlines_buffer(savedcmd);
7759 out_free_temp_buffer:
7760         ring_buffer_free(temp_buffer);
7761 out_free_cpumask:
7762         free_cpumask_var(global_trace.tracing_cpumask);
7763 out_free_buffer_mask:
7764         free_cpumask_var(tracing_buffer_mask);
7765 out:
7766         return ret;
7767 }
7768
7769 void __init trace_init(void)
7770 {
7771         if (tracepoint_printk) {
7772                 tracepoint_print_iter =
7773                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7774                 if (WARN_ON(!tracepoint_print_iter))
7775                         tracepoint_printk = 0;
7776         }
7777         tracer_alloc_buffers();
7778         trace_event_init();
7779 }
7780
7781 __init static int clear_boot_tracer(void)
7782 {
7783         /*
7784          * The default tracer at boot buffer is an init section.
7785          * This function is called in lateinit. If we did not
7786          * find the boot tracer, then clear it out, to prevent
7787          * later registration from accessing the buffer that is
7788          * about to be freed.
7789          */
7790         if (!default_bootup_tracer)
7791                 return 0;
7792
7793         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7794                default_bootup_tracer);
7795         default_bootup_tracer = NULL;
7796
7797         return 0;
7798 }
7799
7800 fs_initcall(tracer_init_tracefs);
7801 late_initcall_sync(clear_boot_tracer);