GNU Linux-libre 4.4.289-gnu1
[releases.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72
73 /* For tracers that don't implement custom flags */
74 static struct tracer_opt dummy_tracer_opt[] = {
75         { }
76 };
77
78 static struct tracer_flags dummy_tracer_flags = {
79         .val = 0,
80         .opts = dummy_tracer_opt
81 };
82
83 static int
84 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
85 {
86         return 0;
87 }
88
89 /*
90  * To prevent the comm cache from being overwritten when no
91  * tracing is active, only save the comm when a trace event
92  * occurred.
93  */
94 static DEFINE_PER_CPU(bool, trace_cmdline_save);
95
96 /*
97  * Kill all tracing for good (never come back).
98  * It is initialized to 1 but will turn to zero if the initialization
99  * of the tracer is successful. But that is the only place that sets
100  * this back to zero.
101  */
102 static int tracing_disabled = 1;
103
104 cpumask_var_t __read_mostly     tracing_buffer_mask;
105
106 /*
107  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108  *
109  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110  * is set, then ftrace_dump is called. This will output the contents
111  * of the ftrace buffers to the console.  This is very useful for
112  * capturing traces that lead to crashes and outputing it to a
113  * serial console.
114  *
115  * It is default off, but you can enable it with either specifying
116  * "ftrace_dump_on_oops" in the kernel command line, or setting
117  * /proc/sys/kernel/ftrace_dump_on_oops
118  * Set 1 if you want to dump buffers of all CPUs
119  * Set 2 if you want to dump the buffer of the CPU that triggered oops
120  */
121
122 enum ftrace_dump_mode ftrace_dump_on_oops;
123
124 /* When set, tracing will stop when a WARN*() is hit */
125 int __disable_trace_on_warning;
126
127 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
128 /* Map of enums to their values, for "enum_map" file */
129 struct trace_enum_map_head {
130         struct module                   *mod;
131         unsigned long                   length;
132 };
133
134 union trace_enum_map_item;
135
136 struct trace_enum_map_tail {
137         /*
138          * "end" is first and points to NULL as it must be different
139          * than "mod" or "enum_string"
140          */
141         union trace_enum_map_item       *next;
142         const char                      *end;   /* points to NULL */
143 };
144
145 static DEFINE_MUTEX(trace_enum_mutex);
146
147 /*
148  * The trace_enum_maps are saved in an array with two extra elements,
149  * one at the beginning, and one at the end. The beginning item contains
150  * the count of the saved maps (head.length), and the module they
151  * belong to if not built in (head.mod). The ending item contains a
152  * pointer to the next array of saved enum_map items.
153  */
154 union trace_enum_map_item {
155         struct trace_enum_map           map;
156         struct trace_enum_map_head      head;
157         struct trace_enum_map_tail      tail;
158 };
159
160 static union trace_enum_map_item *trace_enum_maps;
161 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
162
163 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
164
165 #define MAX_TRACER_SIZE         100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168
169 static bool allocate_snapshot;
170
171 static int __init set_cmdline_ftrace(char *str)
172 {
173         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174         default_bootup_tracer = bootup_tracer_buf;
175         /* We are using ftrace early, expand it */
176         ring_buffer_expanded = true;
177         return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183         if (*str++ != '=' || !*str) {
184                 ftrace_dump_on_oops = DUMP_ALL;
185                 return 1;
186         }
187
188         if (!strcmp("orig_cpu", str)) {
189                 ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196
197 static int __init stop_trace_on_warning(char *str)
198 {
199         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200                 __disable_trace_on_warning = 1;
201         return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204
205 static int __init boot_alloc_snapshot(char *str)
206 {
207         allocate_snapshot = true;
208         /* We also need the main ring buffer expanded */
209         ring_buffer_expanded = true;
210         return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213
214
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216
217 static int __init set_trace_boot_options(char *str)
218 {
219         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220         return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226
227 static int __init set_trace_boot_clock(char *str)
228 {
229         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230         trace_boot_clock = trace_boot_clock_buf;
231         return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234
235 static int __init set_tracepoint_printk(char *str)
236 {
237         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238                 tracepoint_printk = 1;
239         return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242
243 unsigned long long ns2usecs(cycle_t nsec)
244 {
245         nsec += 500;
246         do_div(nsec, 1000);
247         return nsec;
248 }
249
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS                                             \
252         (FUNCTION_DEFAULT_FLAGS |                                       \
253          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
254          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
255          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
256          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
260                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261
262
263 /*
264  * The global_trace is the descriptor that holds the tracing
265  * buffers for the live tracing. For each CPU, it contains
266  * a link list of pages that will store trace entries. The
267  * page descriptor of the pages in the memory is used to hold
268  * the link list by linking the lru item in the page descriptor
269  * to each of the pages in the buffer per CPU.
270  *
271  * For each active CPU there is a data field that holds the
272  * pages for the buffer for that CPU. Each CPU has the same number
273  * of pages allocated for its buffer.
274  */
275 static struct trace_array global_trace = {
276         .trace_flags = TRACE_DEFAULT_FLAGS,
277 };
278
279 LIST_HEAD(ftrace_trace_arrays);
280
281 int trace_array_get(struct trace_array *this_tr)
282 {
283         struct trace_array *tr;
284         int ret = -ENODEV;
285
286         mutex_lock(&trace_types_lock);
287         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
288                 if (tr == this_tr) {
289                         tr->ref++;
290                         ret = 0;
291                         break;
292                 }
293         }
294         mutex_unlock(&trace_types_lock);
295
296         return ret;
297 }
298
299 static void __trace_array_put(struct trace_array *this_tr)
300 {
301         WARN_ON(!this_tr->ref);
302         this_tr->ref--;
303 }
304
305 void trace_array_put(struct trace_array *this_tr)
306 {
307         mutex_lock(&trace_types_lock);
308         __trace_array_put(this_tr);
309         mutex_unlock(&trace_types_lock);
310 }
311
312 int filter_check_discard(struct trace_event_file *file, void *rec,
313                          struct ring_buffer *buffer,
314                          struct ring_buffer_event *event)
315 {
316         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
317             !filter_match_preds(file->filter, rec)) {
318                 ring_buffer_discard_commit(buffer, event);
319                 return 1;
320         }
321
322         return 0;
323 }
324 EXPORT_SYMBOL_GPL(filter_check_discard);
325
326 int call_filter_check_discard(struct trace_event_call *call, void *rec,
327                               struct ring_buffer *buffer,
328                               struct ring_buffer_event *event)
329 {
330         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
331             !filter_match_preds(call->filter, rec)) {
332                 ring_buffer_discard_commit(buffer, event);
333                 return 1;
334         }
335
336         return 0;
337 }
338 EXPORT_SYMBOL_GPL(call_filter_check_discard);
339
340 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
341 {
342         u64 ts;
343
344         /* Early boot up does not have a buffer yet */
345         if (!buf->buffer)
346                 return trace_clock_local();
347
348         ts = ring_buffer_time_stamp(buf->buffer, cpu);
349         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
350
351         return ts;
352 }
353
354 cycle_t ftrace_now(int cpu)
355 {
356         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
357 }
358
359 /**
360  * tracing_is_enabled - Show if global_trace has been disabled
361  *
362  * Shows if the global trace has been enabled or not. It uses the
363  * mirror flag "buffer_disabled" to be used in fast paths such as for
364  * the irqsoff tracer. But it may be inaccurate due to races. If you
365  * need to know the accurate state, use tracing_is_on() which is a little
366  * slower, but accurate.
367  */
368 int tracing_is_enabled(void)
369 {
370         /*
371          * For quick access (irqsoff uses this in fast path), just
372          * return the mirror variable of the state of the ring buffer.
373          * It's a little racy, but we don't really care.
374          */
375         smp_rmb();
376         return !global_trace.buffer_disabled;
377 }
378
379 /*
380  * trace_buf_size is the size in bytes that is allocated
381  * for a buffer. Note, the number of bytes is always rounded
382  * to page size.
383  *
384  * This number is purposely set to a low number of 16384.
385  * If the dump on oops happens, it will be much appreciated
386  * to not have to wait for all that output. Anyway this can be
387  * boot time and run time configurable.
388  */
389 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
390
391 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
392
393 /* trace_types holds a link list of available tracers. */
394 static struct tracer            *trace_types __read_mostly;
395
396 /*
397  * trace_types_lock is used to protect the trace_types list.
398  */
399 DEFINE_MUTEX(trace_types_lock);
400
401 /*
402  * serialize the access of the ring buffer
403  *
404  * ring buffer serializes readers, but it is low level protection.
405  * The validity of the events (which returns by ring_buffer_peek() ..etc)
406  * are not protected by ring buffer.
407  *
408  * The content of events may become garbage if we allow other process consumes
409  * these events concurrently:
410  *   A) the page of the consumed events may become a normal page
411  *      (not reader page) in ring buffer, and this page will be rewrited
412  *      by events producer.
413  *   B) The page of the consumed events may become a page for splice_read,
414  *      and this page will be returned to system.
415  *
416  * These primitives allow multi process access to different cpu ring buffer
417  * concurrently.
418  *
419  * These primitives don't distinguish read-only and read-consume access.
420  * Multi read-only access are also serialized.
421  */
422
423 #ifdef CONFIG_SMP
424 static DECLARE_RWSEM(all_cpu_access_lock);
425 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
426
427 static inline void trace_access_lock(int cpu)
428 {
429         if (cpu == RING_BUFFER_ALL_CPUS) {
430                 /* gain it for accessing the whole ring buffer. */
431                 down_write(&all_cpu_access_lock);
432         } else {
433                 /* gain it for accessing a cpu ring buffer. */
434
435                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
436                 down_read(&all_cpu_access_lock);
437
438                 /* Secondly block other access to this @cpu ring buffer. */
439                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
440         }
441 }
442
443 static inline void trace_access_unlock(int cpu)
444 {
445         if (cpu == RING_BUFFER_ALL_CPUS) {
446                 up_write(&all_cpu_access_lock);
447         } else {
448                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
449                 up_read(&all_cpu_access_lock);
450         }
451 }
452
453 static inline void trace_access_lock_init(void)
454 {
455         int cpu;
456
457         for_each_possible_cpu(cpu)
458                 mutex_init(&per_cpu(cpu_access_lock, cpu));
459 }
460
461 #else
462
463 static DEFINE_MUTEX(access_lock);
464
465 static inline void trace_access_lock(int cpu)
466 {
467         (void)cpu;
468         mutex_lock(&access_lock);
469 }
470
471 static inline void trace_access_unlock(int cpu)
472 {
473         (void)cpu;
474         mutex_unlock(&access_lock);
475 }
476
477 static inline void trace_access_lock_init(void)
478 {
479 }
480
481 #endif
482
483 #ifdef CONFIG_STACKTRACE
484 static void __ftrace_trace_stack(struct ring_buffer *buffer,
485                                  unsigned long flags,
486                                  int skip, int pc, struct pt_regs *regs);
487 static inline void ftrace_trace_stack(struct trace_array *tr,
488                                       struct ring_buffer *buffer,
489                                       unsigned long flags,
490                                       int skip, int pc, struct pt_regs *regs);
491
492 #else
493 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
494                                         unsigned long flags,
495                                         int skip, int pc, struct pt_regs *regs)
496 {
497 }
498 static inline void ftrace_trace_stack(struct trace_array *tr,
499                                       struct ring_buffer *buffer,
500                                       unsigned long flags,
501                                       int skip, int pc, struct pt_regs *regs)
502 {
503 }
504
505 #endif
506
507 static void tracer_tracing_on(struct trace_array *tr)
508 {
509         if (tr->trace_buffer.buffer)
510                 ring_buffer_record_on(tr->trace_buffer.buffer);
511         /*
512          * This flag is looked at when buffers haven't been allocated
513          * yet, or by some tracers (like irqsoff), that just want to
514          * know if the ring buffer has been disabled, but it can handle
515          * races of where it gets disabled but we still do a record.
516          * As the check is in the fast path of the tracers, it is more
517          * important to be fast than accurate.
518          */
519         tr->buffer_disabled = 0;
520         /* Make the flag seen by readers */
521         smp_wmb();
522 }
523
524 /**
525  * tracing_on - enable tracing buffers
526  *
527  * This function enables tracing buffers that may have been
528  * disabled with tracing_off.
529  */
530 void tracing_on(void)
531 {
532         tracer_tracing_on(&global_trace);
533 }
534 EXPORT_SYMBOL_GPL(tracing_on);
535
536 /**
537  * __trace_puts - write a constant string into the trace buffer.
538  * @ip:    The address of the caller
539  * @str:   The constant string to write
540  * @size:  The size of the string.
541  */
542 int __trace_puts(unsigned long ip, const char *str, int size)
543 {
544         struct ring_buffer_event *event;
545         struct ring_buffer *buffer;
546         struct print_entry *entry;
547         unsigned long irq_flags;
548         int alloc;
549         int pc;
550
551         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
552                 return 0;
553
554         pc = preempt_count();
555
556         if (unlikely(tracing_selftest_running || tracing_disabled))
557                 return 0;
558
559         alloc = sizeof(*entry) + size + 2; /* possible \n added */
560
561         local_save_flags(irq_flags);
562         buffer = global_trace.trace_buffer.buffer;
563         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
564                                           irq_flags, pc);
565         if (!event)
566                 return 0;
567
568         entry = ring_buffer_event_data(event);
569         entry->ip = ip;
570
571         memcpy(&entry->buf, str, size);
572
573         /* Add a newline if necessary */
574         if (entry->buf[size - 1] != '\n') {
575                 entry->buf[size] = '\n';
576                 entry->buf[size + 1] = '\0';
577         } else
578                 entry->buf[size] = '\0';
579
580         __buffer_unlock_commit(buffer, event);
581         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
582
583         return size;
584 }
585 EXPORT_SYMBOL_GPL(__trace_puts);
586
587 /**
588  * __trace_bputs - write the pointer to a constant string into trace buffer
589  * @ip:    The address of the caller
590  * @str:   The constant string to write to the buffer to
591  */
592 int __trace_bputs(unsigned long ip, const char *str)
593 {
594         struct ring_buffer_event *event;
595         struct ring_buffer *buffer;
596         struct bputs_entry *entry;
597         unsigned long irq_flags;
598         int size = sizeof(struct bputs_entry);
599         int pc;
600
601         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
602                 return 0;
603
604         pc = preempt_count();
605
606         if (unlikely(tracing_selftest_running || tracing_disabled))
607                 return 0;
608
609         local_save_flags(irq_flags);
610         buffer = global_trace.trace_buffer.buffer;
611         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
612                                           irq_flags, pc);
613         if (!event)
614                 return 0;
615
616         entry = ring_buffer_event_data(event);
617         entry->ip                       = ip;
618         entry->str                      = str;
619
620         __buffer_unlock_commit(buffer, event);
621         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
622
623         return 1;
624 }
625 EXPORT_SYMBOL_GPL(__trace_bputs);
626
627 #ifdef CONFIG_TRACER_SNAPSHOT
628 /**
629  * trace_snapshot - take a snapshot of the current buffer.
630  *
631  * This causes a swap between the snapshot buffer and the current live
632  * tracing buffer. You can use this to take snapshots of the live
633  * trace when some condition is triggered, but continue to trace.
634  *
635  * Note, make sure to allocate the snapshot with either
636  * a tracing_snapshot_alloc(), or by doing it manually
637  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
638  *
639  * If the snapshot buffer is not allocated, it will stop tracing.
640  * Basically making a permanent snapshot.
641  */
642 void tracing_snapshot(void)
643 {
644         struct trace_array *tr = &global_trace;
645         struct tracer *tracer = tr->current_trace;
646         unsigned long flags;
647
648         if (in_nmi()) {
649                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
650                 internal_trace_puts("*** snapshot is being ignored        ***\n");
651                 return;
652         }
653
654         if (!tr->allocated_snapshot) {
655                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
656                 internal_trace_puts("*** stopping trace here!   ***\n");
657                 tracing_off();
658                 return;
659         }
660
661         /* Note, snapshot can not be used when the tracer uses it */
662         if (tracer->use_max_tr) {
663                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
664                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
665                 return;
666         }
667
668         local_irq_save(flags);
669         update_max_tr(tr, current, smp_processor_id());
670         local_irq_restore(flags);
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot);
673
674 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
675                                         struct trace_buffer *size_buf, int cpu_id);
676 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
677
678 static int alloc_snapshot(struct trace_array *tr)
679 {
680         int ret;
681
682         if (!tr->allocated_snapshot) {
683
684                 /* allocate spare buffer */
685                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
686                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
687                 if (ret < 0)
688                         return ret;
689
690                 tr->allocated_snapshot = true;
691         }
692
693         return 0;
694 }
695
696 static void free_snapshot(struct trace_array *tr)
697 {
698         /*
699          * We don't free the ring buffer. instead, resize it because
700          * The max_tr ring buffer has some state (e.g. ring->clock) and
701          * we want preserve it.
702          */
703         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
704         set_buffer_entries(&tr->max_buffer, 1);
705         tracing_reset_online_cpus(&tr->max_buffer);
706         tr->allocated_snapshot = false;
707 }
708
709 /**
710  * tracing_alloc_snapshot - allocate snapshot buffer.
711  *
712  * This only allocates the snapshot buffer if it isn't already
713  * allocated - it doesn't also take a snapshot.
714  *
715  * This is meant to be used in cases where the snapshot buffer needs
716  * to be set up for events that can't sleep but need to be able to
717  * trigger a snapshot.
718  */
719 int tracing_alloc_snapshot(void)
720 {
721         struct trace_array *tr = &global_trace;
722         int ret;
723
724         ret = alloc_snapshot(tr);
725         WARN_ON(ret < 0);
726
727         return ret;
728 }
729 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
730
731 /**
732  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
733  *
734  * This is similar to trace_snapshot(), but it will allocate the
735  * snapshot buffer if it isn't already allocated. Use this only
736  * where it is safe to sleep, as the allocation may sleep.
737  *
738  * This causes a swap between the snapshot buffer and the current live
739  * tracing buffer. You can use this to take snapshots of the live
740  * trace when some condition is triggered, but continue to trace.
741  */
742 void tracing_snapshot_alloc(void)
743 {
744         int ret;
745
746         ret = tracing_alloc_snapshot();
747         if (ret < 0)
748                 return;
749
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #else
754 void tracing_snapshot(void)
755 {
756         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
757 }
758 EXPORT_SYMBOL_GPL(tracing_snapshot);
759 int tracing_alloc_snapshot(void)
760 {
761         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
762         return -ENODEV;
763 }
764 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
765 void tracing_snapshot_alloc(void)
766 {
767         /* Give warning */
768         tracing_snapshot();
769 }
770 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
771 #endif /* CONFIG_TRACER_SNAPSHOT */
772
773 static void tracer_tracing_off(struct trace_array *tr)
774 {
775         if (tr->trace_buffer.buffer)
776                 ring_buffer_record_off(tr->trace_buffer.buffer);
777         /*
778          * This flag is looked at when buffers haven't been allocated
779          * yet, or by some tracers (like irqsoff), that just want to
780          * know if the ring buffer has been disabled, but it can handle
781          * races of where it gets disabled but we still do a record.
782          * As the check is in the fast path of the tracers, it is more
783          * important to be fast than accurate.
784          */
785         tr->buffer_disabled = 1;
786         /* Make the flag seen by readers */
787         smp_wmb();
788 }
789
790 /**
791  * tracing_off - turn off tracing buffers
792  *
793  * This function stops the tracing buffers from recording data.
794  * It does not disable any overhead the tracers themselves may
795  * be causing. This function simply causes all recording to
796  * the ring buffers to fail.
797  */
798 void tracing_off(void)
799 {
800         tracer_tracing_off(&global_trace);
801 }
802 EXPORT_SYMBOL_GPL(tracing_off);
803
804 void disable_trace_on_warning(void)
805 {
806         if (__disable_trace_on_warning)
807                 tracing_off();
808 }
809
810 /**
811  * tracer_tracing_is_on - show real state of ring buffer enabled
812  * @tr : the trace array to know if ring buffer is enabled
813  *
814  * Shows real state of the ring buffer if it is enabled or not.
815  */
816 static int tracer_tracing_is_on(struct trace_array *tr)
817 {
818         if (tr->trace_buffer.buffer)
819                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
820         return !tr->buffer_disabled;
821 }
822
823 /**
824  * tracing_is_on - show state of ring buffers enabled
825  */
826 int tracing_is_on(void)
827 {
828         return tracer_tracing_is_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_is_on);
831
832 static int __init set_buf_size(char *str)
833 {
834         unsigned long buf_size;
835
836         if (!str)
837                 return 0;
838         buf_size = memparse(str, &str);
839         /* nr_entries can not be zero */
840         if (buf_size == 0)
841                 return 0;
842         trace_buf_size = buf_size;
843         return 1;
844 }
845 __setup("trace_buf_size=", set_buf_size);
846
847 static int __init set_tracing_thresh(char *str)
848 {
849         unsigned long threshold;
850         int ret;
851
852         if (!str)
853                 return 0;
854         ret = kstrtoul(str, 0, &threshold);
855         if (ret < 0)
856                 return 0;
857         tracing_thresh = threshold * 1000;
858         return 1;
859 }
860 __setup("tracing_thresh=", set_tracing_thresh);
861
862 unsigned long nsecs_to_usecs(unsigned long nsecs)
863 {
864         return nsecs / 1000;
865 }
866
867 /*
868  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
869  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
870  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
871  * of strings in the order that the enums were defined.
872  */
873 #undef C
874 #define C(a, b) b
875
876 /* These must match the bit postions in trace_iterator_flags */
877 static const char *trace_options[] = {
878         TRACE_FLAGS
879         NULL
880 };
881
882 static struct {
883         u64 (*func)(void);
884         const char *name;
885         int in_ns;              /* is this clock in nanoseconds? */
886 } trace_clocks[] = {
887         { trace_clock_local,            "local",        1 },
888         { trace_clock_global,           "global",       1 },
889         { trace_clock_counter,          "counter",      0 },
890         { trace_clock_jiffies,          "uptime",       0 },
891         { trace_clock,                  "perf",         1 },
892         { ktime_get_mono_fast_ns,       "mono",         1 },
893         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
894         ARCH_TRACE_CLOCKS
895 };
896
897 /*
898  * trace_parser_get_init - gets the buffer for trace parser
899  */
900 int trace_parser_get_init(struct trace_parser *parser, int size)
901 {
902         memset(parser, 0, sizeof(*parser));
903
904         parser->buffer = kmalloc(size, GFP_KERNEL);
905         if (!parser->buffer)
906                 return 1;
907
908         parser->size = size;
909         return 0;
910 }
911
912 /*
913  * trace_parser_put - frees the buffer for trace parser
914  */
915 void trace_parser_put(struct trace_parser *parser)
916 {
917         kfree(parser->buffer);
918 }
919
920 /*
921  * trace_get_user - reads the user input string separated by  space
922  * (matched by isspace(ch))
923  *
924  * For each string found the 'struct trace_parser' is updated,
925  * and the function returns.
926  *
927  * Returns number of bytes read.
928  *
929  * See kernel/trace/trace.h for 'struct trace_parser' details.
930  */
931 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
932         size_t cnt, loff_t *ppos)
933 {
934         char ch;
935         size_t read = 0;
936         ssize_t ret;
937
938         if (!*ppos)
939                 trace_parser_clear(parser);
940
941         ret = get_user(ch, ubuf++);
942         if (ret)
943                 goto out;
944
945         read++;
946         cnt--;
947
948         /*
949          * The parser is not finished with the last write,
950          * continue reading the user input without skipping spaces.
951          */
952         if (!parser->cont) {
953                 /* skip white space */
954                 while (cnt && isspace(ch)) {
955                         ret = get_user(ch, ubuf++);
956                         if (ret)
957                                 goto out;
958                         read++;
959                         cnt--;
960                 }
961
962                 /* only spaces were written */
963                 if (isspace(ch)) {
964                         *ppos += read;
965                         ret = read;
966                         goto out;
967                 }
968
969                 parser->idx = 0;
970         }
971
972         /* read the non-space input */
973         while (cnt && !isspace(ch)) {
974                 if (parser->idx < parser->size - 1)
975                         parser->buffer[parser->idx++] = ch;
976                 else {
977                         ret = -EINVAL;
978                         goto out;
979                 }
980                 ret = get_user(ch, ubuf++);
981                 if (ret)
982                         goto out;
983                 read++;
984                 cnt--;
985         }
986
987         /* We either got finished input or we have to wait for another call. */
988         if (isspace(ch)) {
989                 parser->buffer[parser->idx] = 0;
990                 parser->cont = false;
991         } else if (parser->idx < parser->size - 1) {
992                 parser->cont = true;
993                 parser->buffer[parser->idx++] = ch;
994         } else {
995                 ret = -EINVAL;
996                 goto out;
997         }
998
999         *ppos += read;
1000         ret = read;
1001
1002 out:
1003         return ret;
1004 }
1005
1006 /* TODO add a seq_buf_to_buffer() */
1007 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1008 {
1009         int len;
1010
1011         if (trace_seq_used(s) <= s->seq.readpos)
1012                 return -EBUSY;
1013
1014         len = trace_seq_used(s) - s->seq.readpos;
1015         if (cnt > len)
1016                 cnt = len;
1017         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1018
1019         s->seq.readpos += cnt;
1020         return cnt;
1021 }
1022
1023 unsigned long __read_mostly     tracing_thresh;
1024
1025 #ifdef CONFIG_TRACER_MAX_TRACE
1026 /*
1027  * Copy the new maximum trace into the separate maximum-trace
1028  * structure. (this way the maximum trace is permanently saved,
1029  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1030  */
1031 static void
1032 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1033 {
1034         struct trace_buffer *trace_buf = &tr->trace_buffer;
1035         struct trace_buffer *max_buf = &tr->max_buffer;
1036         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1037         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1038
1039         max_buf->cpu = cpu;
1040         max_buf->time_start = data->preempt_timestamp;
1041
1042         max_data->saved_latency = tr->max_latency;
1043         max_data->critical_start = data->critical_start;
1044         max_data->critical_end = data->critical_end;
1045
1046         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1047         max_data->pid = tsk->pid;
1048         /*
1049          * If tsk == current, then use current_uid(), as that does not use
1050          * RCU. The irq tracer can be called out of RCU scope.
1051          */
1052         if (tsk == current)
1053                 max_data->uid = current_uid();
1054         else
1055                 max_data->uid = task_uid(tsk);
1056
1057         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1058         max_data->policy = tsk->policy;
1059         max_data->rt_priority = tsk->rt_priority;
1060
1061         /* record this tasks comm */
1062         tracing_record_cmdline(tsk);
1063 }
1064
1065 /**
1066  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1067  * @tr: tracer
1068  * @tsk: the task with the latency
1069  * @cpu: The cpu that initiated the trace.
1070  *
1071  * Flip the buffers between the @tr and the max_tr and record information
1072  * about which task was the cause of this latency.
1073  */
1074 void
1075 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1076 {
1077         struct ring_buffer *buf;
1078
1079         if (tr->stop_count)
1080                 return;
1081
1082         WARN_ON_ONCE(!irqs_disabled());
1083
1084         if (!tr->allocated_snapshot) {
1085                 /* Only the nop tracer should hit this when disabling */
1086                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1087                 return;
1088         }
1089
1090         arch_spin_lock(&tr->max_lock);
1091
1092         /* Inherit the recordable setting from trace_buffer */
1093         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1094                 ring_buffer_record_on(tr->max_buffer.buffer);
1095         else
1096                 ring_buffer_record_off(tr->max_buffer.buffer);
1097
1098         buf = tr->trace_buffer.buffer;
1099         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1100         tr->max_buffer.buffer = buf;
1101
1102         __update_max_tr(tr, tsk, cpu);
1103         arch_spin_unlock(&tr->max_lock);
1104 }
1105
1106 /**
1107  * update_max_tr_single - only copy one trace over, and reset the rest
1108  * @tr - tracer
1109  * @tsk - task with the latency
1110  * @cpu - the cpu of the buffer to copy.
1111  *
1112  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1113  */
1114 void
1115 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1116 {
1117         int ret;
1118
1119         if (tr->stop_count)
1120                 return;
1121
1122         WARN_ON_ONCE(!irqs_disabled());
1123         if (!tr->allocated_snapshot) {
1124                 /* Only the nop tracer should hit this when disabling */
1125                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1126                 return;
1127         }
1128
1129         arch_spin_lock(&tr->max_lock);
1130
1131         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1132
1133         if (ret == -EBUSY) {
1134                 /*
1135                  * We failed to swap the buffer due to a commit taking
1136                  * place on this CPU. We fail to record, but we reset
1137                  * the max trace buffer (no one writes directly to it)
1138                  * and flag that it failed.
1139                  */
1140                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1141                         "Failed to swap buffers due to commit in progress\n");
1142         }
1143
1144         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1145
1146         __update_max_tr(tr, tsk, cpu);
1147         arch_spin_unlock(&tr->max_lock);
1148 }
1149 #endif /* CONFIG_TRACER_MAX_TRACE */
1150
1151 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1152 {
1153         /* Iterators are static, they should be filled or empty */
1154         if (trace_buffer_iter(iter, iter->cpu_file))
1155                 return 0;
1156
1157         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1158                                 full);
1159 }
1160
1161 #ifdef CONFIG_FTRACE_STARTUP_TEST
1162 static int run_tracer_selftest(struct tracer *type)
1163 {
1164         struct trace_array *tr = &global_trace;
1165         struct tracer *saved_tracer = tr->current_trace;
1166         int ret;
1167
1168         if (!type->selftest || tracing_selftest_disabled)
1169                 return 0;
1170
1171         /*
1172          * Run a selftest on this tracer.
1173          * Here we reset the trace buffer, and set the current
1174          * tracer to be this tracer. The tracer can then run some
1175          * internal tracing to verify that everything is in order.
1176          * If we fail, we do not register this tracer.
1177          */
1178         tracing_reset_online_cpus(&tr->trace_buffer);
1179
1180         tr->current_trace = type;
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 /* If we expanded the buffers, make sure the max is expanded too */
1185                 if (ring_buffer_expanded)
1186                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1187                                            RING_BUFFER_ALL_CPUS);
1188                 tr->allocated_snapshot = true;
1189         }
1190 #endif
1191
1192         /* the test is responsible for initializing and enabling */
1193         pr_info("Testing tracer %s: ", type->name);
1194         ret = type->selftest(type, tr);
1195         /* the test is responsible for resetting too */
1196         tr->current_trace = saved_tracer;
1197         if (ret) {
1198                 printk(KERN_CONT "FAILED!\n");
1199                 /* Add the warning after printing 'FAILED' */
1200                 WARN_ON(1);
1201                 return -1;
1202         }
1203         /* Only reset on passing, to avoid touching corrupted buffers */
1204         tracing_reset_online_cpus(&tr->trace_buffer);
1205
1206 #ifdef CONFIG_TRACER_MAX_TRACE
1207         if (type->use_max_tr) {
1208                 tr->allocated_snapshot = false;
1209
1210                 /* Shrink the max buffer again */
1211                 if (ring_buffer_expanded)
1212                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1213                                            RING_BUFFER_ALL_CPUS);
1214         }
1215 #endif
1216
1217         printk(KERN_CONT "PASSED\n");
1218         return 0;
1219 }
1220 #else
1221 static inline int run_tracer_selftest(struct tracer *type)
1222 {
1223         return 0;
1224 }
1225 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1226
1227 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1228
1229 static void __init apply_trace_boot_options(void);
1230
1231 /**
1232  * register_tracer - register a tracer with the ftrace system.
1233  * @type - the plugin for the tracer
1234  *
1235  * Register a new plugin tracer.
1236  */
1237 int __init register_tracer(struct tracer *type)
1238 {
1239         struct tracer *t;
1240         int ret = 0;
1241
1242         if (!type->name) {
1243                 pr_info("Tracer must have a name\n");
1244                 return -1;
1245         }
1246
1247         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1248                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1249                 return -1;
1250         }
1251
1252         mutex_lock(&trace_types_lock);
1253
1254         tracing_selftest_running = true;
1255
1256         for (t = trace_types; t; t = t->next) {
1257                 if (strcmp(type->name, t->name) == 0) {
1258                         /* already found */
1259                         pr_info("Tracer %s already registered\n",
1260                                 type->name);
1261                         ret = -1;
1262                         goto out;
1263                 }
1264         }
1265
1266         if (!type->set_flag)
1267                 type->set_flag = &dummy_set_flag;
1268         if (!type->flags)
1269                 type->flags = &dummy_tracer_flags;
1270         else
1271                 if (!type->flags->opts)
1272                         type->flags->opts = dummy_tracer_opt;
1273
1274         ret = run_tracer_selftest(type);
1275         if (ret < 0)
1276                 goto out;
1277
1278         type->next = trace_types;
1279         trace_types = type;
1280         add_tracer_options(&global_trace, type);
1281
1282  out:
1283         tracing_selftest_running = false;
1284         mutex_unlock(&trace_types_lock);
1285
1286         if (ret || !default_bootup_tracer)
1287                 goto out_unlock;
1288
1289         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1290                 goto out_unlock;
1291
1292         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1293         /* Do we want this tracer to start on bootup? */
1294         tracing_set_tracer(&global_trace, type->name);
1295         default_bootup_tracer = NULL;
1296
1297         apply_trace_boot_options();
1298
1299         /* disable other selftests, since this will break it. */
1300         tracing_selftest_disabled = true;
1301 #ifdef CONFIG_FTRACE_STARTUP_TEST
1302         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1303                type->name);
1304 #endif
1305
1306  out_unlock:
1307         return ret;
1308 }
1309
1310 void tracing_reset(struct trace_buffer *buf, int cpu)
1311 {
1312         struct ring_buffer *buffer = buf->buffer;
1313
1314         if (!buffer)
1315                 return;
1316
1317         ring_buffer_record_disable(buffer);
1318
1319         /* Make sure all commits have finished */
1320         synchronize_sched();
1321         ring_buffer_reset_cpu(buffer, cpu);
1322
1323         ring_buffer_record_enable(buffer);
1324 }
1325
1326 void tracing_reset_online_cpus(struct trace_buffer *buf)
1327 {
1328         struct ring_buffer *buffer = buf->buffer;
1329         int cpu;
1330
1331         if (!buffer)
1332                 return;
1333
1334         ring_buffer_record_disable(buffer);
1335
1336         /* Make sure all commits have finished */
1337         synchronize_sched();
1338
1339         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1340
1341         for_each_online_cpu(cpu)
1342                 ring_buffer_reset_cpu(buffer, cpu);
1343
1344         ring_buffer_record_enable(buffer);
1345 }
1346
1347 /* Must have trace_types_lock held */
1348 void tracing_reset_all_online_cpus(void)
1349 {
1350         struct trace_array *tr;
1351
1352         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1353                 tracing_reset_online_cpus(&tr->trace_buffer);
1354 #ifdef CONFIG_TRACER_MAX_TRACE
1355                 tracing_reset_online_cpus(&tr->max_buffer);
1356 #endif
1357         }
1358 }
1359
1360 #define SAVED_CMDLINES_DEFAULT 128
1361 #define NO_CMDLINE_MAP UINT_MAX
1362 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1363 struct saved_cmdlines_buffer {
1364         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1365         unsigned *map_cmdline_to_pid;
1366         unsigned cmdline_num;
1367         int cmdline_idx;
1368         char *saved_cmdlines;
1369 };
1370 static struct saved_cmdlines_buffer *savedcmd;
1371
1372 static inline char *get_saved_cmdlines(int idx)
1373 {
1374         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1375 }
1376
1377 static inline void set_cmdline(int idx, const char *cmdline)
1378 {
1379         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1380 }
1381
1382 static int allocate_cmdlines_buffer(unsigned int val,
1383                                     struct saved_cmdlines_buffer *s)
1384 {
1385         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1386                                         GFP_KERNEL);
1387         if (!s->map_cmdline_to_pid)
1388                 return -ENOMEM;
1389
1390         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1391         if (!s->saved_cmdlines) {
1392                 kfree(s->map_cmdline_to_pid);
1393                 return -ENOMEM;
1394         }
1395
1396         s->cmdline_idx = 0;
1397         s->cmdline_num = val;
1398         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1399                sizeof(s->map_pid_to_cmdline));
1400         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1401                val * sizeof(*s->map_cmdline_to_pid));
1402
1403         return 0;
1404 }
1405
1406 static int trace_create_savedcmd(void)
1407 {
1408         int ret;
1409
1410         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1411         if (!savedcmd)
1412                 return -ENOMEM;
1413
1414         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1415         if (ret < 0) {
1416                 kfree(savedcmd);
1417                 savedcmd = NULL;
1418                 return -ENOMEM;
1419         }
1420
1421         return 0;
1422 }
1423
1424 int is_tracing_stopped(void)
1425 {
1426         return global_trace.stop_count;
1427 }
1428
1429 /**
1430  * tracing_start - quick start of the tracer
1431  *
1432  * If tracing is enabled but was stopped by tracing_stop,
1433  * this will start the tracer back up.
1434  */
1435 void tracing_start(void)
1436 {
1437         struct ring_buffer *buffer;
1438         unsigned long flags;
1439
1440         if (tracing_disabled)
1441                 return;
1442
1443         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1444         if (--global_trace.stop_count) {
1445                 if (global_trace.stop_count < 0) {
1446                         /* Someone screwed up their debugging */
1447                         WARN_ON_ONCE(1);
1448                         global_trace.stop_count = 0;
1449                 }
1450                 goto out;
1451         }
1452
1453         /* Prevent the buffers from switching */
1454         arch_spin_lock(&global_trace.max_lock);
1455
1456         buffer = global_trace.trace_buffer.buffer;
1457         if (buffer)
1458                 ring_buffer_record_enable(buffer);
1459
1460 #ifdef CONFIG_TRACER_MAX_TRACE
1461         buffer = global_trace.max_buffer.buffer;
1462         if (buffer)
1463                 ring_buffer_record_enable(buffer);
1464 #endif
1465
1466         arch_spin_unlock(&global_trace.max_lock);
1467
1468  out:
1469         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1470 }
1471
1472 static void tracing_start_tr(struct trace_array *tr)
1473 {
1474         struct ring_buffer *buffer;
1475         unsigned long flags;
1476
1477         if (tracing_disabled)
1478                 return;
1479
1480         /* If global, we need to also start the max tracer */
1481         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1482                 return tracing_start();
1483
1484         raw_spin_lock_irqsave(&tr->start_lock, flags);
1485
1486         if (--tr->stop_count) {
1487                 if (tr->stop_count < 0) {
1488                         /* Someone screwed up their debugging */
1489                         WARN_ON_ONCE(1);
1490                         tr->stop_count = 0;
1491                 }
1492                 goto out;
1493         }
1494
1495         buffer = tr->trace_buffer.buffer;
1496         if (buffer)
1497                 ring_buffer_record_enable(buffer);
1498
1499  out:
1500         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1501 }
1502
1503 /**
1504  * tracing_stop - quick stop of the tracer
1505  *
1506  * Light weight way to stop tracing. Use in conjunction with
1507  * tracing_start.
1508  */
1509 void tracing_stop(void)
1510 {
1511         struct ring_buffer *buffer;
1512         unsigned long flags;
1513
1514         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1515         if (global_trace.stop_count++)
1516                 goto out;
1517
1518         /* Prevent the buffers from switching */
1519         arch_spin_lock(&global_trace.max_lock);
1520
1521         buffer = global_trace.trace_buffer.buffer;
1522         if (buffer)
1523                 ring_buffer_record_disable(buffer);
1524
1525 #ifdef CONFIG_TRACER_MAX_TRACE
1526         buffer = global_trace.max_buffer.buffer;
1527         if (buffer)
1528                 ring_buffer_record_disable(buffer);
1529 #endif
1530
1531         arch_spin_unlock(&global_trace.max_lock);
1532
1533  out:
1534         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1535 }
1536
1537 static void tracing_stop_tr(struct trace_array *tr)
1538 {
1539         struct ring_buffer *buffer;
1540         unsigned long flags;
1541
1542         /* If global, we need to also stop the max tracer */
1543         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1544                 return tracing_stop();
1545
1546         raw_spin_lock_irqsave(&tr->start_lock, flags);
1547         if (tr->stop_count++)
1548                 goto out;
1549
1550         buffer = tr->trace_buffer.buffer;
1551         if (buffer)
1552                 ring_buffer_record_disable(buffer);
1553
1554  out:
1555         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1556 }
1557
1558 void trace_stop_cmdline_recording(void);
1559
1560 static int trace_save_cmdline(struct task_struct *tsk)
1561 {
1562         unsigned tpid, idx;
1563
1564         /* treat recording of idle task as a success */
1565         if (!tsk->pid)
1566                 return 1;
1567
1568         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1569
1570         /*
1571          * It's not the end of the world if we don't get
1572          * the lock, but we also don't want to spin
1573          * nor do we want to disable interrupts,
1574          * so if we miss here, then better luck next time.
1575          */
1576         if (!arch_spin_trylock(&trace_cmdline_lock))
1577                 return 0;
1578
1579         idx = savedcmd->map_pid_to_cmdline[tpid];
1580         if (idx == NO_CMDLINE_MAP) {
1581                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1582
1583                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1584                 savedcmd->cmdline_idx = idx;
1585         }
1586
1587         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1588         set_cmdline(idx, tsk->comm);
1589
1590         arch_spin_unlock(&trace_cmdline_lock);
1591
1592         return 1;
1593 }
1594
1595 static void __trace_find_cmdline(int pid, char comm[])
1596 {
1597         unsigned map;
1598         int tpid;
1599
1600         if (!pid) {
1601                 strcpy(comm, "<idle>");
1602                 return;
1603         }
1604
1605         if (WARN_ON_ONCE(pid < 0)) {
1606                 strcpy(comm, "<XXX>");
1607                 return;
1608         }
1609
1610         tpid = pid & (PID_MAX_DEFAULT - 1);
1611         map = savedcmd->map_pid_to_cmdline[tpid];
1612         if (map != NO_CMDLINE_MAP) {
1613                 tpid = savedcmd->map_cmdline_to_pid[map];
1614                 if (tpid == pid) {
1615                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1616                         return;
1617                 }
1618         }
1619         strcpy(comm, "<...>");
1620 }
1621
1622 void trace_find_cmdline(int pid, char comm[])
1623 {
1624         preempt_disable();
1625         arch_spin_lock(&trace_cmdline_lock);
1626
1627         __trace_find_cmdline(pid, comm);
1628
1629         arch_spin_unlock(&trace_cmdline_lock);
1630         preempt_enable();
1631 }
1632
1633 void tracing_record_cmdline(struct task_struct *tsk)
1634 {
1635         if (!__this_cpu_read(trace_cmdline_save))
1636                 return;
1637
1638         if (trace_save_cmdline(tsk))
1639                 __this_cpu_write(trace_cmdline_save, false);
1640 }
1641
1642 void
1643 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1644                              int pc)
1645 {
1646         struct task_struct *tsk = current;
1647
1648         entry->preempt_count            = pc & 0xff;
1649         entry->pid                      = (tsk) ? tsk->pid : 0;
1650         entry->flags =
1651 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1652                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1653 #else
1654                 TRACE_FLAG_IRQS_NOSUPPORT |
1655 #endif
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 struct ring_buffer_event *
1664 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1665                           int type,
1666                           unsigned long len,
1667                           unsigned long flags, int pc)
1668 {
1669         struct ring_buffer_event *event;
1670
1671         event = ring_buffer_lock_reserve(buffer, len);
1672         if (event != NULL) {
1673                 struct trace_entry *ent = ring_buffer_event_data(event);
1674
1675                 tracing_generic_entry_update(ent, flags, pc);
1676                 ent->type = type;
1677         }
1678
1679         return event;
1680 }
1681
1682 void
1683 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1684 {
1685         __this_cpu_write(trace_cmdline_save, true);
1686         ring_buffer_unlock_commit(buffer, event);
1687 }
1688
1689 void trace_buffer_unlock_commit(struct trace_array *tr,
1690                                 struct ring_buffer *buffer,
1691                                 struct ring_buffer_event *event,
1692                                 unsigned long flags, int pc)
1693 {
1694         __buffer_unlock_commit(buffer, event);
1695
1696         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1697         ftrace_trace_userstack(tr, buffer, flags, pc);
1698 }
1699 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1700
1701 static struct ring_buffer *temp_buffer;
1702
1703 struct ring_buffer_event *
1704 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1705                           struct trace_event_file *trace_file,
1706                           int type, unsigned long len,
1707                           unsigned long flags, int pc)
1708 {
1709         struct ring_buffer_event *entry;
1710
1711         *current_rb = trace_file->tr->trace_buffer.buffer;
1712         entry = trace_buffer_lock_reserve(*current_rb,
1713                                          type, len, flags, pc);
1714         /*
1715          * If tracing is off, but we have triggers enabled
1716          * we still need to look at the event data. Use the temp_buffer
1717          * to store the trace event for the tigger to use. It's recusive
1718          * safe and will not be recorded anywhere.
1719          */
1720         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1721                 *current_rb = temp_buffer;
1722                 entry = trace_buffer_lock_reserve(*current_rb,
1723                                                   type, len, flags, pc);
1724         }
1725         return entry;
1726 }
1727 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1728
1729 struct ring_buffer_event *
1730 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1731                                   int type, unsigned long len,
1732                                   unsigned long flags, int pc)
1733 {
1734         *current_rb = global_trace.trace_buffer.buffer;
1735         return trace_buffer_lock_reserve(*current_rb,
1736                                          type, len, flags, pc);
1737 }
1738 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1739
1740 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1741                                      struct ring_buffer *buffer,
1742                                      struct ring_buffer_event *event,
1743                                      unsigned long flags, int pc,
1744                                      struct pt_regs *regs)
1745 {
1746         __buffer_unlock_commit(buffer, event);
1747
1748         /*
1749          * If regs is not set, then skip the following callers:
1750          *   trace_buffer_unlock_commit_regs
1751          *   event_trigger_unlock_commit
1752          *   trace_event_buffer_commit
1753          *   trace_event_raw_event_sched_switch
1754          * Note, we can still get here via blktrace, wakeup tracer
1755          * and mmiotrace, but that's ok if they lose a function or
1756          * two. They are that meaningful.
1757          */
1758         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
1759         ftrace_trace_userstack(tr, buffer, flags, pc);
1760 }
1761 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1762
1763 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1764                                          struct ring_buffer_event *event)
1765 {
1766         ring_buffer_discard_commit(buffer, event);
1767 }
1768 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1769
1770 void
1771 trace_function(struct trace_array *tr,
1772                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1773                int pc)
1774 {
1775         struct trace_event_call *call = &event_function;
1776         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1777         struct ring_buffer_event *event;
1778         struct ftrace_entry *entry;
1779
1780         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1781                                           flags, pc);
1782         if (!event)
1783                 return;
1784         entry   = ring_buffer_event_data(event);
1785         entry->ip                       = ip;
1786         entry->parent_ip                = parent_ip;
1787
1788         if (!call_filter_check_discard(call, entry, buffer, event))
1789                 __buffer_unlock_commit(buffer, event);
1790 }
1791
1792 #ifdef CONFIG_STACKTRACE
1793
1794 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1795 struct ftrace_stack {
1796         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1797 };
1798
1799 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1800 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1801
1802 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1803                                  unsigned long flags,
1804                                  int skip, int pc, struct pt_regs *regs)
1805 {
1806         struct trace_event_call *call = &event_kernel_stack;
1807         struct ring_buffer_event *event;
1808         struct stack_entry *entry;
1809         struct stack_trace trace;
1810         int use_stack;
1811         int size = FTRACE_STACK_ENTRIES;
1812
1813         trace.nr_entries        = 0;
1814         trace.skip              = skip;
1815
1816         /*
1817          * Add two, for this function and the call to save_stack_trace()
1818          * If regs is set, then these functions will not be in the way.
1819          */
1820         if (!regs)
1821                 trace.skip += 2;
1822
1823         /*
1824          * Since events can happen in NMIs there's no safe way to
1825          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1826          * or NMI comes in, it will just have to use the default
1827          * FTRACE_STACK_SIZE.
1828          */
1829         preempt_disable_notrace();
1830
1831         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1832         /*
1833          * We don't need any atomic variables, just a barrier.
1834          * If an interrupt comes in, we don't care, because it would
1835          * have exited and put the counter back to what we want.
1836          * We just need a barrier to keep gcc from moving things
1837          * around.
1838          */
1839         barrier();
1840         if (use_stack == 1) {
1841                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1842                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1843
1844                 if (regs)
1845                         save_stack_trace_regs(regs, &trace);
1846                 else
1847                         save_stack_trace(&trace);
1848
1849                 if (trace.nr_entries > size)
1850                         size = trace.nr_entries;
1851         } else
1852                 /* From now on, use_stack is a boolean */
1853                 use_stack = 0;
1854
1855         size *= sizeof(unsigned long);
1856
1857         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1858                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
1859                                     flags, pc);
1860         if (!event)
1861                 goto out;
1862         entry = ring_buffer_event_data(event);
1863
1864         memset(&entry->caller, 0, size);
1865
1866         if (use_stack)
1867                 memcpy(&entry->caller, trace.entries,
1868                        trace.nr_entries * sizeof(unsigned long));
1869         else {
1870                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1871                 trace.entries           = entry->caller;
1872                 if (regs)
1873                         save_stack_trace_regs(regs, &trace);
1874                 else
1875                         save_stack_trace(&trace);
1876         }
1877
1878         entry->size = trace.nr_entries;
1879
1880         if (!call_filter_check_discard(call, entry, buffer, event))
1881                 __buffer_unlock_commit(buffer, event);
1882
1883  out:
1884         /* Again, don't let gcc optimize things here */
1885         barrier();
1886         __this_cpu_dec(ftrace_stack_reserve);
1887         preempt_enable_notrace();
1888
1889 }
1890
1891 static inline void ftrace_trace_stack(struct trace_array *tr,
1892                                       struct ring_buffer *buffer,
1893                                       unsigned long flags,
1894                                       int skip, int pc, struct pt_regs *regs)
1895 {
1896         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1897                 return;
1898
1899         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1900 }
1901
1902 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1903                    int pc)
1904 {
1905         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1906 }
1907
1908 /**
1909  * trace_dump_stack - record a stack back trace in the trace buffer
1910  * @skip: Number of functions to skip (helper handlers)
1911  */
1912 void trace_dump_stack(int skip)
1913 {
1914         unsigned long flags;
1915
1916         if (tracing_disabled || tracing_selftest_running)
1917                 return;
1918
1919         local_save_flags(flags);
1920
1921         /*
1922          * Skip 3 more, seems to get us at the caller of
1923          * this function.
1924          */
1925         skip += 3;
1926         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1927                              flags, skip, preempt_count(), NULL);
1928 }
1929
1930 static DEFINE_PER_CPU(int, user_stack_count);
1931
1932 void
1933 ftrace_trace_userstack(struct trace_array *tr,
1934                        struct ring_buffer *buffer, unsigned long flags, int pc)
1935 {
1936         struct trace_event_call *call = &event_user_stack;
1937         struct ring_buffer_event *event;
1938         struct userstack_entry *entry;
1939         struct stack_trace trace;
1940
1941         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
1942                 return;
1943
1944         /*
1945          * NMIs can not handle page faults, even with fix ups.
1946          * The save user stack can (and often does) fault.
1947          */
1948         if (unlikely(in_nmi()))
1949                 return;
1950
1951         /*
1952          * prevent recursion, since the user stack tracing may
1953          * trigger other kernel events.
1954          */
1955         preempt_disable();
1956         if (__this_cpu_read(user_stack_count))
1957                 goto out;
1958
1959         __this_cpu_inc(user_stack_count);
1960
1961         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1962                                           sizeof(*entry), flags, pc);
1963         if (!event)
1964                 goto out_drop_count;
1965         entry   = ring_buffer_event_data(event);
1966
1967         entry->tgid             = current->tgid;
1968         memset(&entry->caller, 0, sizeof(entry->caller));
1969
1970         trace.nr_entries        = 0;
1971         trace.max_entries       = FTRACE_STACK_ENTRIES;
1972         trace.skip              = 0;
1973         trace.entries           = entry->caller;
1974
1975         save_stack_trace_user(&trace);
1976         if (!call_filter_check_discard(call, entry, buffer, event))
1977                 __buffer_unlock_commit(buffer, event);
1978
1979  out_drop_count:
1980         __this_cpu_dec(user_stack_count);
1981  out:
1982         preempt_enable();
1983 }
1984
1985 #ifdef UNUSED
1986 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1987 {
1988         ftrace_trace_userstack(tr, flags, preempt_count());
1989 }
1990 #endif /* UNUSED */
1991
1992 #endif /* CONFIG_STACKTRACE */
1993
1994 /* created for use with alloc_percpu */
1995 struct trace_buffer_struct {
1996         char buffer[TRACE_BUF_SIZE];
1997 };
1998
1999 static struct trace_buffer_struct *trace_percpu_buffer;
2000 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2001 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2002 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2003
2004 /*
2005  * The buffer used is dependent on the context. There is a per cpu
2006  * buffer for normal context, softirq contex, hard irq context and
2007  * for NMI context. Thise allows for lockless recording.
2008  *
2009  * Note, if the buffers failed to be allocated, then this returns NULL
2010  */
2011 static char *get_trace_buf(void)
2012 {
2013         struct trace_buffer_struct *percpu_buffer;
2014
2015         /*
2016          * If we have allocated per cpu buffers, then we do not
2017          * need to do any locking.
2018          */
2019         if (in_nmi())
2020                 percpu_buffer = trace_percpu_nmi_buffer;
2021         else if (in_irq())
2022                 percpu_buffer = trace_percpu_irq_buffer;
2023         else if (in_softirq())
2024                 percpu_buffer = trace_percpu_sirq_buffer;
2025         else
2026                 percpu_buffer = trace_percpu_buffer;
2027
2028         if (!percpu_buffer)
2029                 return NULL;
2030
2031         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2032 }
2033
2034 static int alloc_percpu_trace_buffer(void)
2035 {
2036         struct trace_buffer_struct *buffers;
2037         struct trace_buffer_struct *sirq_buffers;
2038         struct trace_buffer_struct *irq_buffers;
2039         struct trace_buffer_struct *nmi_buffers;
2040
2041         buffers = alloc_percpu(struct trace_buffer_struct);
2042         if (!buffers)
2043                 goto err_warn;
2044
2045         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2046         if (!sirq_buffers)
2047                 goto err_sirq;
2048
2049         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2050         if (!irq_buffers)
2051                 goto err_irq;
2052
2053         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2054         if (!nmi_buffers)
2055                 goto err_nmi;
2056
2057         trace_percpu_buffer = buffers;
2058         trace_percpu_sirq_buffer = sirq_buffers;
2059         trace_percpu_irq_buffer = irq_buffers;
2060         trace_percpu_nmi_buffer = nmi_buffers;
2061
2062         return 0;
2063
2064  err_nmi:
2065         free_percpu(irq_buffers);
2066  err_irq:
2067         free_percpu(sirq_buffers);
2068  err_sirq:
2069         free_percpu(buffers);
2070  err_warn:
2071         WARN(1, "Could not allocate percpu trace_printk buffer");
2072         return -ENOMEM;
2073 }
2074
2075 static int buffers_allocated;
2076
2077 void trace_printk_init_buffers(void)
2078 {
2079         if (buffers_allocated)
2080                 return;
2081
2082         if (alloc_percpu_trace_buffer())
2083                 return;
2084
2085         /* trace_printk() is for debug use only. Don't use it in production. */
2086
2087         pr_warning("\n");
2088         pr_warning("**********************************************************\n");
2089         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2090         pr_warning("**                                                      **\n");
2091         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2092         pr_warning("**                                                      **\n");
2093         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2094         pr_warning("** unsafe for production use.                           **\n");
2095         pr_warning("**                                                      **\n");
2096         pr_warning("** If you see this message and you are not debugging    **\n");
2097         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2098         pr_warning("**                                                      **\n");
2099         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2100         pr_warning("**********************************************************\n");
2101
2102         /* Expand the buffers to set size */
2103         tracing_update_buffers();
2104
2105         buffers_allocated = 1;
2106
2107         /*
2108          * trace_printk_init_buffers() can be called by modules.
2109          * If that happens, then we need to start cmdline recording
2110          * directly here. If the global_trace.buffer is already
2111          * allocated here, then this was called by module code.
2112          */
2113         if (global_trace.trace_buffer.buffer)
2114                 tracing_start_cmdline_record();
2115 }
2116
2117 void trace_printk_start_comm(void)
2118 {
2119         /* Start tracing comms if trace printk is set */
2120         if (!buffers_allocated)
2121                 return;
2122         tracing_start_cmdline_record();
2123 }
2124
2125 static void trace_printk_start_stop_comm(int enabled)
2126 {
2127         if (!buffers_allocated)
2128                 return;
2129
2130         if (enabled)
2131                 tracing_start_cmdline_record();
2132         else
2133                 tracing_stop_cmdline_record();
2134 }
2135
2136 /**
2137  * trace_vbprintk - write binary msg to tracing buffer
2138  *
2139  */
2140 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2141 {
2142         struct trace_event_call *call = &event_bprint;
2143         struct ring_buffer_event *event;
2144         struct ring_buffer *buffer;
2145         struct trace_array *tr = &global_trace;
2146         struct bprint_entry *entry;
2147         unsigned long flags;
2148         char *tbuffer;
2149         int len = 0, size, pc;
2150
2151         if (unlikely(tracing_selftest_running || tracing_disabled))
2152                 return 0;
2153
2154         /* Don't pollute graph traces with trace_vprintk internals */
2155         pause_graph_tracing();
2156
2157         pc = preempt_count();
2158         preempt_disable_notrace();
2159
2160         tbuffer = get_trace_buf();
2161         if (!tbuffer) {
2162                 len = 0;
2163                 goto out;
2164         }
2165
2166         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2167
2168         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2169                 goto out;
2170
2171         local_save_flags(flags);
2172         size = sizeof(*entry) + sizeof(u32) * len;
2173         buffer = tr->trace_buffer.buffer;
2174         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2175                                           flags, pc);
2176         if (!event)
2177                 goto out;
2178         entry = ring_buffer_event_data(event);
2179         entry->ip                       = ip;
2180         entry->fmt                      = fmt;
2181
2182         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2183         if (!call_filter_check_discard(call, entry, buffer, event)) {
2184                 __buffer_unlock_commit(buffer, event);
2185                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2186         }
2187
2188 out:
2189         preempt_enable_notrace();
2190         unpause_graph_tracing();
2191
2192         return len;
2193 }
2194 EXPORT_SYMBOL_GPL(trace_vbprintk);
2195
2196 __printf(3, 0)
2197 static int
2198 __trace_array_vprintk(struct ring_buffer *buffer,
2199                       unsigned long ip, const char *fmt, va_list args)
2200 {
2201         struct trace_event_call *call = &event_print;
2202         struct ring_buffer_event *event;
2203         int len = 0, size, pc;
2204         struct print_entry *entry;
2205         unsigned long flags;
2206         char *tbuffer;
2207
2208         if (tracing_disabled || tracing_selftest_running)
2209                 return 0;
2210
2211         /* Don't pollute graph traces with trace_vprintk internals */
2212         pause_graph_tracing();
2213
2214         pc = preempt_count();
2215         preempt_disable_notrace();
2216
2217
2218         tbuffer = get_trace_buf();
2219         if (!tbuffer) {
2220                 len = 0;
2221                 goto out;
2222         }
2223
2224         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2225
2226         local_save_flags(flags);
2227         size = sizeof(*entry) + len + 1;
2228         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2229                                           flags, pc);
2230         if (!event)
2231                 goto out;
2232         entry = ring_buffer_event_data(event);
2233         entry->ip = ip;
2234
2235         memcpy(&entry->buf, tbuffer, len + 1);
2236         if (!call_filter_check_discard(call, entry, buffer, event)) {
2237                 __buffer_unlock_commit(buffer, event);
2238                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2239         }
2240  out:
2241         preempt_enable_notrace();
2242         unpause_graph_tracing();
2243
2244         return len;
2245 }
2246
2247 __printf(3, 0)
2248 int trace_array_vprintk(struct trace_array *tr,
2249                         unsigned long ip, const char *fmt, va_list args)
2250 {
2251         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2252 }
2253
2254 __printf(3, 0)
2255 int trace_array_printk(struct trace_array *tr,
2256                        unsigned long ip, const char *fmt, ...)
2257 {
2258         int ret;
2259         va_list ap;
2260
2261         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2262                 return 0;
2263
2264         if (!tr)
2265                 return -ENOENT;
2266
2267         va_start(ap, fmt);
2268         ret = trace_array_vprintk(tr, ip, fmt, ap);
2269         va_end(ap);
2270         return ret;
2271 }
2272
2273 __printf(3, 4)
2274 int trace_array_printk_buf(struct ring_buffer *buffer,
2275                            unsigned long ip, const char *fmt, ...)
2276 {
2277         int ret;
2278         va_list ap;
2279
2280         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2281                 return 0;
2282
2283         va_start(ap, fmt);
2284         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2285         va_end(ap);
2286         return ret;
2287 }
2288
2289 __printf(2, 0)
2290 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2291 {
2292         return trace_array_vprintk(&global_trace, ip, fmt, args);
2293 }
2294 EXPORT_SYMBOL_GPL(trace_vprintk);
2295
2296 static void trace_iterator_increment(struct trace_iterator *iter)
2297 {
2298         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2299
2300         iter->idx++;
2301         if (buf_iter)
2302                 ring_buffer_read(buf_iter, NULL);
2303 }
2304
2305 static struct trace_entry *
2306 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2307                 unsigned long *lost_events)
2308 {
2309         struct ring_buffer_event *event;
2310         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2311
2312         if (buf_iter)
2313                 event = ring_buffer_iter_peek(buf_iter, ts);
2314         else
2315                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2316                                          lost_events);
2317
2318         if (event) {
2319                 iter->ent_size = ring_buffer_event_length(event);
2320                 return ring_buffer_event_data(event);
2321         }
2322         iter->ent_size = 0;
2323         return NULL;
2324 }
2325
2326 static struct trace_entry *
2327 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2328                   unsigned long *missing_events, u64 *ent_ts)
2329 {
2330         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2331         struct trace_entry *ent, *next = NULL;
2332         unsigned long lost_events = 0, next_lost = 0;
2333         int cpu_file = iter->cpu_file;
2334         u64 next_ts = 0, ts;
2335         int next_cpu = -1;
2336         int next_size = 0;
2337         int cpu;
2338
2339         /*
2340          * If we are in a per_cpu trace file, don't bother by iterating over
2341          * all cpu and peek directly.
2342          */
2343         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2344                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2345                         return NULL;
2346                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2347                 if (ent_cpu)
2348                         *ent_cpu = cpu_file;
2349
2350                 return ent;
2351         }
2352
2353         for_each_tracing_cpu(cpu) {
2354
2355                 if (ring_buffer_empty_cpu(buffer, cpu))
2356                         continue;
2357
2358                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2359
2360                 /*
2361                  * Pick the entry with the smallest timestamp:
2362                  */
2363                 if (ent && (!next || ts < next_ts)) {
2364                         next = ent;
2365                         next_cpu = cpu;
2366                         next_ts = ts;
2367                         next_lost = lost_events;
2368                         next_size = iter->ent_size;
2369                 }
2370         }
2371
2372         iter->ent_size = next_size;
2373
2374         if (ent_cpu)
2375                 *ent_cpu = next_cpu;
2376
2377         if (ent_ts)
2378                 *ent_ts = next_ts;
2379
2380         if (missing_events)
2381                 *missing_events = next_lost;
2382
2383         return next;
2384 }
2385
2386 /* Find the next real entry, without updating the iterator itself */
2387 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2388                                           int *ent_cpu, u64 *ent_ts)
2389 {
2390         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2391 }
2392
2393 /* Find the next real entry, and increment the iterator to the next entry */
2394 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2395 {
2396         iter->ent = __find_next_entry(iter, &iter->cpu,
2397                                       &iter->lost_events, &iter->ts);
2398
2399         if (iter->ent)
2400                 trace_iterator_increment(iter);
2401
2402         return iter->ent ? iter : NULL;
2403 }
2404
2405 static void trace_consume(struct trace_iterator *iter)
2406 {
2407         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2408                             &iter->lost_events);
2409 }
2410
2411 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2412 {
2413         struct trace_iterator *iter = m->private;
2414         int i = (int)*pos;
2415         void *ent;
2416
2417         WARN_ON_ONCE(iter->leftover);
2418
2419         (*pos)++;
2420
2421         /* can't go backwards */
2422         if (iter->idx > i)
2423                 return NULL;
2424
2425         if (iter->idx < 0)
2426                 ent = trace_find_next_entry_inc(iter);
2427         else
2428                 ent = iter;
2429
2430         while (ent && iter->idx < i)
2431                 ent = trace_find_next_entry_inc(iter);
2432
2433         iter->pos = *pos;
2434
2435         return ent;
2436 }
2437
2438 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2439 {
2440         struct ring_buffer_event *event;
2441         struct ring_buffer_iter *buf_iter;
2442         unsigned long entries = 0;
2443         u64 ts;
2444
2445         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2446
2447         buf_iter = trace_buffer_iter(iter, cpu);
2448         if (!buf_iter)
2449                 return;
2450
2451         ring_buffer_iter_reset(buf_iter);
2452
2453         /*
2454          * We could have the case with the max latency tracers
2455          * that a reset never took place on a cpu. This is evident
2456          * by the timestamp being before the start of the buffer.
2457          */
2458         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2459                 if (ts >= iter->trace_buffer->time_start)
2460                         break;
2461                 entries++;
2462                 ring_buffer_read(buf_iter, NULL);
2463         }
2464
2465         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2466 }
2467
2468 /*
2469  * The current tracer is copied to avoid a global locking
2470  * all around.
2471  */
2472 static void *s_start(struct seq_file *m, loff_t *pos)
2473 {
2474         struct trace_iterator *iter = m->private;
2475         struct trace_array *tr = iter->tr;
2476         int cpu_file = iter->cpu_file;
2477         void *p = NULL;
2478         loff_t l = 0;
2479         int cpu;
2480
2481         /*
2482          * copy the tracer to avoid using a global lock all around.
2483          * iter->trace is a copy of current_trace, the pointer to the
2484          * name may be used instead of a strcmp(), as iter->trace->name
2485          * will point to the same string as current_trace->name.
2486          */
2487         mutex_lock(&trace_types_lock);
2488         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2489                 *iter->trace = *tr->current_trace;
2490         mutex_unlock(&trace_types_lock);
2491
2492 #ifdef CONFIG_TRACER_MAX_TRACE
2493         if (iter->snapshot && iter->trace->use_max_tr)
2494                 return ERR_PTR(-EBUSY);
2495 #endif
2496
2497         if (*pos != iter->pos) {
2498                 iter->ent = NULL;
2499                 iter->cpu = 0;
2500                 iter->idx = -1;
2501
2502                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2503                         for_each_tracing_cpu(cpu)
2504                                 tracing_iter_reset(iter, cpu);
2505                 } else
2506                         tracing_iter_reset(iter, cpu_file);
2507
2508                 iter->leftover = 0;
2509                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2510                         ;
2511
2512         } else {
2513                 /*
2514                  * If we overflowed the seq_file before, then we want
2515                  * to just reuse the trace_seq buffer again.
2516                  */
2517                 if (iter->leftover)
2518                         p = iter;
2519                 else {
2520                         l = *pos - 1;
2521                         p = s_next(m, p, &l);
2522                 }
2523         }
2524
2525         trace_event_read_lock();
2526         trace_access_lock(cpu_file);
2527         return p;
2528 }
2529
2530 static void s_stop(struct seq_file *m, void *p)
2531 {
2532         struct trace_iterator *iter = m->private;
2533
2534 #ifdef CONFIG_TRACER_MAX_TRACE
2535         if (iter->snapshot && iter->trace->use_max_tr)
2536                 return;
2537 #endif
2538
2539         trace_access_unlock(iter->cpu_file);
2540         trace_event_read_unlock();
2541 }
2542
2543 static void
2544 get_total_entries(struct trace_buffer *buf,
2545                   unsigned long *total, unsigned long *entries)
2546 {
2547         unsigned long count;
2548         int cpu;
2549
2550         *total = 0;
2551         *entries = 0;
2552
2553         for_each_tracing_cpu(cpu) {
2554                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2555                 /*
2556                  * If this buffer has skipped entries, then we hold all
2557                  * entries for the trace and we need to ignore the
2558                  * ones before the time stamp.
2559                  */
2560                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2561                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2562                         /* total is the same as the entries */
2563                         *total += count;
2564                 } else
2565                         *total += count +
2566                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2567                 *entries += count;
2568         }
2569 }
2570
2571 static void print_lat_help_header(struct seq_file *m)
2572 {
2573         seq_puts(m, "#                  _------=> CPU#            \n"
2574                     "#                 / _-----=> irqs-off        \n"
2575                     "#                | / _----=> need-resched    \n"
2576                     "#                || / _---=> hardirq/softirq \n"
2577                     "#                ||| / _--=> preempt-depth   \n"
2578                     "#                |||| /     delay            \n"
2579                     "#  cmd     pid   ||||| time  |   caller      \n"
2580                     "#     \\   /      |||||  \\    |   /         \n");
2581 }
2582
2583 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2584 {
2585         unsigned long total;
2586         unsigned long entries;
2587
2588         get_total_entries(buf, &total, &entries);
2589         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2590                    entries, total, num_online_cpus());
2591         seq_puts(m, "#\n");
2592 }
2593
2594 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2595 {
2596         print_event_info(buf, m);
2597         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2598                     "#              | |       |          |         |\n");
2599 }
2600
2601 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2602 {
2603         print_event_info(buf, m);
2604         seq_puts(m, "#                              _-----=> irqs-off\n"
2605                     "#                             / _----=> need-resched\n"
2606                     "#                            | / _---=> hardirq/softirq\n"
2607                     "#                            || / _--=> preempt-depth\n"
2608                     "#                            ||| /     delay\n"
2609                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2610                     "#              | |       |   ||||       |         |\n");
2611 }
2612
2613 void
2614 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2615 {
2616         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2617         struct trace_buffer *buf = iter->trace_buffer;
2618         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2619         struct tracer *type = iter->trace;
2620         unsigned long entries;
2621         unsigned long total;
2622         const char *name = "preemption";
2623
2624         name = type->name;
2625
2626         get_total_entries(buf, &total, &entries);
2627
2628         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2629                    name, UTS_RELEASE);
2630         seq_puts(m, "# -----------------------------------"
2631                  "---------------------------------\n");
2632         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2633                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2634                    nsecs_to_usecs(data->saved_latency),
2635                    entries,
2636                    total,
2637                    buf->cpu,
2638 #if defined(CONFIG_PREEMPT_NONE)
2639                    "server",
2640 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2641                    "desktop",
2642 #elif defined(CONFIG_PREEMPT)
2643                    "preempt",
2644 #else
2645                    "unknown",
2646 #endif
2647                    /* These are reserved for later use */
2648                    0, 0, 0, 0);
2649 #ifdef CONFIG_SMP
2650         seq_printf(m, " #P:%d)\n", num_online_cpus());
2651 #else
2652         seq_puts(m, ")\n");
2653 #endif
2654         seq_puts(m, "#    -----------------\n");
2655         seq_printf(m, "#    | task: %.16s-%d "
2656                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2657                    data->comm, data->pid,
2658                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2659                    data->policy, data->rt_priority);
2660         seq_puts(m, "#    -----------------\n");
2661
2662         if (data->critical_start) {
2663                 seq_puts(m, "#  => started at: ");
2664                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2665                 trace_print_seq(m, &iter->seq);
2666                 seq_puts(m, "\n#  => ended at:   ");
2667                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2668                 trace_print_seq(m, &iter->seq);
2669                 seq_puts(m, "\n#\n");
2670         }
2671
2672         seq_puts(m, "#\n");
2673 }
2674
2675 static void test_cpu_buff_start(struct trace_iterator *iter)
2676 {
2677         struct trace_seq *s = &iter->seq;
2678         struct trace_array *tr = iter->tr;
2679
2680         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2681                 return;
2682
2683         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2684                 return;
2685
2686         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2687                 return;
2688
2689         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2690                 return;
2691
2692         if (iter->started)
2693                 cpumask_set_cpu(iter->cpu, iter->started);
2694
2695         /* Don't print started cpu buffer for the first entry of the trace */
2696         if (iter->idx > 1)
2697                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2698                                 iter->cpu);
2699 }
2700
2701 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2702 {
2703         struct trace_array *tr = iter->tr;
2704         struct trace_seq *s = &iter->seq;
2705         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2706         struct trace_entry *entry;
2707         struct trace_event *event;
2708
2709         entry = iter->ent;
2710
2711         test_cpu_buff_start(iter);
2712
2713         event = ftrace_find_event(entry->type);
2714
2715         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2716                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2717                         trace_print_lat_context(iter);
2718                 else
2719                         trace_print_context(iter);
2720         }
2721
2722         if (trace_seq_has_overflowed(s))
2723                 return TRACE_TYPE_PARTIAL_LINE;
2724
2725         if (event)
2726                 return event->funcs->trace(iter, sym_flags, event);
2727
2728         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2729
2730         return trace_handle_return(s);
2731 }
2732
2733 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2734 {
2735         struct trace_array *tr = iter->tr;
2736         struct trace_seq *s = &iter->seq;
2737         struct trace_entry *entry;
2738         struct trace_event *event;
2739
2740         entry = iter->ent;
2741
2742         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2743                 trace_seq_printf(s, "%d %d %llu ",
2744                                  entry->pid, iter->cpu, iter->ts);
2745
2746         if (trace_seq_has_overflowed(s))
2747                 return TRACE_TYPE_PARTIAL_LINE;
2748
2749         event = ftrace_find_event(entry->type);
2750         if (event)
2751                 return event->funcs->raw(iter, 0, event);
2752
2753         trace_seq_printf(s, "%d ?\n", entry->type);
2754
2755         return trace_handle_return(s);
2756 }
2757
2758 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2759 {
2760         struct trace_array *tr = iter->tr;
2761         struct trace_seq *s = &iter->seq;
2762         unsigned char newline = '\n';
2763         struct trace_entry *entry;
2764         struct trace_event *event;
2765
2766         entry = iter->ent;
2767
2768         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2769                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2770                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2771                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2772                 if (trace_seq_has_overflowed(s))
2773                         return TRACE_TYPE_PARTIAL_LINE;
2774         }
2775
2776         event = ftrace_find_event(entry->type);
2777         if (event) {
2778                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2779                 if (ret != TRACE_TYPE_HANDLED)
2780                         return ret;
2781         }
2782
2783         SEQ_PUT_FIELD(s, newline);
2784
2785         return trace_handle_return(s);
2786 }
2787
2788 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2789 {
2790         struct trace_array *tr = iter->tr;
2791         struct trace_seq *s = &iter->seq;
2792         struct trace_entry *entry;
2793         struct trace_event *event;
2794
2795         entry = iter->ent;
2796
2797         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2798                 SEQ_PUT_FIELD(s, entry->pid);
2799                 SEQ_PUT_FIELD(s, iter->cpu);
2800                 SEQ_PUT_FIELD(s, iter->ts);
2801                 if (trace_seq_has_overflowed(s))
2802                         return TRACE_TYPE_PARTIAL_LINE;
2803         }
2804
2805         event = ftrace_find_event(entry->type);
2806         return event ? event->funcs->binary(iter, 0, event) :
2807                 TRACE_TYPE_HANDLED;
2808 }
2809
2810 int trace_empty(struct trace_iterator *iter)
2811 {
2812         struct ring_buffer_iter *buf_iter;
2813         int cpu;
2814
2815         /* If we are looking at one CPU buffer, only check that one */
2816         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2817                 cpu = iter->cpu_file;
2818                 buf_iter = trace_buffer_iter(iter, cpu);
2819                 if (buf_iter) {
2820                         if (!ring_buffer_iter_empty(buf_iter))
2821                                 return 0;
2822                 } else {
2823                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2824                                 return 0;
2825                 }
2826                 return 1;
2827         }
2828
2829         for_each_tracing_cpu(cpu) {
2830                 buf_iter = trace_buffer_iter(iter, cpu);
2831                 if (buf_iter) {
2832                         if (!ring_buffer_iter_empty(buf_iter))
2833                                 return 0;
2834                 } else {
2835                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2836                                 return 0;
2837                 }
2838         }
2839
2840         return 1;
2841 }
2842
2843 /*  Called with trace_event_read_lock() held. */
2844 enum print_line_t print_trace_line(struct trace_iterator *iter)
2845 {
2846         struct trace_array *tr = iter->tr;
2847         unsigned long trace_flags = tr->trace_flags;
2848         enum print_line_t ret;
2849
2850         if (iter->lost_events) {
2851                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2852                                  iter->cpu, iter->lost_events);
2853                 if (trace_seq_has_overflowed(&iter->seq))
2854                         return TRACE_TYPE_PARTIAL_LINE;
2855         }
2856
2857         if (iter->trace && iter->trace->print_line) {
2858                 ret = iter->trace->print_line(iter);
2859                 if (ret != TRACE_TYPE_UNHANDLED)
2860                         return ret;
2861         }
2862
2863         if (iter->ent->type == TRACE_BPUTS &&
2864                         trace_flags & TRACE_ITER_PRINTK &&
2865                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2866                 return trace_print_bputs_msg_only(iter);
2867
2868         if (iter->ent->type == TRACE_BPRINT &&
2869                         trace_flags & TRACE_ITER_PRINTK &&
2870                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2871                 return trace_print_bprintk_msg_only(iter);
2872
2873         if (iter->ent->type == TRACE_PRINT &&
2874                         trace_flags & TRACE_ITER_PRINTK &&
2875                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2876                 return trace_print_printk_msg_only(iter);
2877
2878         if (trace_flags & TRACE_ITER_BIN)
2879                 return print_bin_fmt(iter);
2880
2881         if (trace_flags & TRACE_ITER_HEX)
2882                 return print_hex_fmt(iter);
2883
2884         if (trace_flags & TRACE_ITER_RAW)
2885                 return print_raw_fmt(iter);
2886
2887         return print_trace_fmt(iter);
2888 }
2889
2890 void trace_latency_header(struct seq_file *m)
2891 {
2892         struct trace_iterator *iter = m->private;
2893         struct trace_array *tr = iter->tr;
2894
2895         /* print nothing if the buffers are empty */
2896         if (trace_empty(iter))
2897                 return;
2898
2899         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2900                 print_trace_header(m, iter);
2901
2902         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2903                 print_lat_help_header(m);
2904 }
2905
2906 void trace_default_header(struct seq_file *m)
2907 {
2908         struct trace_iterator *iter = m->private;
2909         struct trace_array *tr = iter->tr;
2910         unsigned long trace_flags = tr->trace_flags;
2911
2912         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2913                 return;
2914
2915         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2916                 /* print nothing if the buffers are empty */
2917                 if (trace_empty(iter))
2918                         return;
2919                 print_trace_header(m, iter);
2920                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2921                         print_lat_help_header(m);
2922         } else {
2923                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2924                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2925                                 print_func_help_header_irq(iter->trace_buffer, m);
2926                         else
2927                                 print_func_help_header(iter->trace_buffer, m);
2928                 }
2929         }
2930 }
2931
2932 static void test_ftrace_alive(struct seq_file *m)
2933 {
2934         if (!ftrace_is_dead())
2935                 return;
2936         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2937                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2938 }
2939
2940 #ifdef CONFIG_TRACER_MAX_TRACE
2941 static void show_snapshot_main_help(struct seq_file *m)
2942 {
2943         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2944                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2945                     "#                      Takes a snapshot of the main buffer.\n"
2946                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2947                     "#                      (Doesn't have to be '2' works with any number that\n"
2948                     "#                       is not a '0' or '1')\n");
2949 }
2950
2951 static void show_snapshot_percpu_help(struct seq_file *m)
2952 {
2953         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2954 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2955         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2956                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2957 #else
2958         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2959                     "#                     Must use main snapshot file to allocate.\n");
2960 #endif
2961         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2962                     "#                      (Doesn't have to be '2' works with any number that\n"
2963                     "#                       is not a '0' or '1')\n");
2964 }
2965
2966 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2967 {
2968         if (iter->tr->allocated_snapshot)
2969                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2970         else
2971                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2972
2973         seq_puts(m, "# Snapshot commands:\n");
2974         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2975                 show_snapshot_main_help(m);
2976         else
2977                 show_snapshot_percpu_help(m);
2978 }
2979 #else
2980 /* Should never be called */
2981 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2982 #endif
2983
2984 static int s_show(struct seq_file *m, void *v)
2985 {
2986         struct trace_iterator *iter = v;
2987         int ret;
2988
2989         if (iter->ent == NULL) {
2990                 if (iter->tr) {
2991                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2992                         seq_puts(m, "#\n");
2993                         test_ftrace_alive(m);
2994                 }
2995                 if (iter->snapshot && trace_empty(iter))
2996                         print_snapshot_help(m, iter);
2997                 else if (iter->trace && iter->trace->print_header)
2998                         iter->trace->print_header(m);
2999                 else
3000                         trace_default_header(m);
3001
3002         } else if (iter->leftover) {
3003                 /*
3004                  * If we filled the seq_file buffer earlier, we
3005                  * want to just show it now.
3006                  */
3007                 ret = trace_print_seq(m, &iter->seq);
3008
3009                 /* ret should this time be zero, but you never know */
3010                 iter->leftover = ret;
3011
3012         } else {
3013                 print_trace_line(iter);
3014                 ret = trace_print_seq(m, &iter->seq);
3015                 /*
3016                  * If we overflow the seq_file buffer, then it will
3017                  * ask us for this data again at start up.
3018                  * Use that instead.
3019                  *  ret is 0 if seq_file write succeeded.
3020                  *        -1 otherwise.
3021                  */
3022                 iter->leftover = ret;
3023         }
3024
3025         return 0;
3026 }
3027
3028 /*
3029  * Should be used after trace_array_get(), trace_types_lock
3030  * ensures that i_cdev was already initialized.
3031  */
3032 static inline int tracing_get_cpu(struct inode *inode)
3033 {
3034         if (inode->i_cdev) /* See trace_create_cpu_file() */
3035                 return (long)inode->i_cdev - 1;
3036         return RING_BUFFER_ALL_CPUS;
3037 }
3038
3039 static const struct seq_operations tracer_seq_ops = {
3040         .start          = s_start,
3041         .next           = s_next,
3042         .stop           = s_stop,
3043         .show           = s_show,
3044 };
3045
3046 static struct trace_iterator *
3047 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3048 {
3049         struct trace_array *tr = inode->i_private;
3050         struct trace_iterator *iter;
3051         int cpu;
3052
3053         if (tracing_disabled)
3054                 return ERR_PTR(-ENODEV);
3055
3056         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3057         if (!iter)
3058                 return ERR_PTR(-ENOMEM);
3059
3060         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3061                                     GFP_KERNEL);
3062         if (!iter->buffer_iter)
3063                 goto release;
3064
3065         /*
3066          * We make a copy of the current tracer to avoid concurrent
3067          * changes on it while we are reading.
3068          */
3069         mutex_lock(&trace_types_lock);
3070         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3071         if (!iter->trace)
3072                 goto fail;
3073
3074         *iter->trace = *tr->current_trace;
3075
3076         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3077                 goto fail;
3078
3079         iter->tr = tr;
3080
3081 #ifdef CONFIG_TRACER_MAX_TRACE
3082         /* Currently only the top directory has a snapshot */
3083         if (tr->current_trace->print_max || snapshot)
3084                 iter->trace_buffer = &tr->max_buffer;
3085         else
3086 #endif
3087                 iter->trace_buffer = &tr->trace_buffer;
3088         iter->snapshot = snapshot;
3089         iter->pos = -1;
3090         iter->cpu_file = tracing_get_cpu(inode);
3091         mutex_init(&iter->mutex);
3092
3093         /* Notify the tracer early; before we stop tracing. */
3094         if (iter->trace && iter->trace->open)
3095                 iter->trace->open(iter);
3096
3097         /* Annotate start of buffers if we had overruns */
3098         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3099                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3100
3101         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3102         if (trace_clocks[tr->clock_id].in_ns)
3103                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3104
3105         /* stop the trace while dumping if we are not opening "snapshot" */
3106         if (!iter->snapshot)
3107                 tracing_stop_tr(tr);
3108
3109         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3110                 for_each_tracing_cpu(cpu) {
3111                         iter->buffer_iter[cpu] =
3112                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3113                                                          cpu, GFP_KERNEL);
3114                 }
3115                 ring_buffer_read_prepare_sync();
3116                 for_each_tracing_cpu(cpu) {
3117                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3118                         tracing_iter_reset(iter, cpu);
3119                 }
3120         } else {
3121                 cpu = iter->cpu_file;
3122                 iter->buffer_iter[cpu] =
3123                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3124                                                  cpu, GFP_KERNEL);
3125                 ring_buffer_read_prepare_sync();
3126                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3127                 tracing_iter_reset(iter, cpu);
3128         }
3129
3130         mutex_unlock(&trace_types_lock);
3131
3132         return iter;
3133
3134  fail:
3135         mutex_unlock(&trace_types_lock);
3136         kfree(iter->trace);
3137         kfree(iter->buffer_iter);
3138 release:
3139         seq_release_private(inode, file);
3140         return ERR_PTR(-ENOMEM);
3141 }
3142
3143 int tracing_open_generic(struct inode *inode, struct file *filp)
3144 {
3145         if (tracing_disabled)
3146                 return -ENODEV;
3147
3148         filp->private_data = inode->i_private;
3149         return 0;
3150 }
3151
3152 bool tracing_is_disabled(void)
3153 {
3154         return (tracing_disabled) ? true: false;
3155 }
3156
3157 /*
3158  * Open and update trace_array ref count.
3159  * Must have the current trace_array passed to it.
3160  */
3161 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3162 {
3163         struct trace_array *tr = inode->i_private;
3164
3165         if (tracing_disabled)
3166                 return -ENODEV;
3167
3168         if (trace_array_get(tr) < 0)
3169                 return -ENODEV;
3170
3171         filp->private_data = inode->i_private;
3172
3173         return 0;
3174 }
3175
3176 static int tracing_release(struct inode *inode, struct file *file)
3177 {
3178         struct trace_array *tr = inode->i_private;
3179         struct seq_file *m = file->private_data;
3180         struct trace_iterator *iter;
3181         int cpu;
3182
3183         if (!(file->f_mode & FMODE_READ)) {
3184                 trace_array_put(tr);
3185                 return 0;
3186         }
3187
3188         /* Writes do not use seq_file */
3189         iter = m->private;
3190         mutex_lock(&trace_types_lock);
3191
3192         for_each_tracing_cpu(cpu) {
3193                 if (iter->buffer_iter[cpu])
3194                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3195         }
3196
3197         if (iter->trace && iter->trace->close)
3198                 iter->trace->close(iter);
3199
3200         if (!iter->snapshot)
3201                 /* reenable tracing if it was previously enabled */
3202                 tracing_start_tr(tr);
3203
3204         __trace_array_put(tr);
3205
3206         mutex_unlock(&trace_types_lock);
3207
3208         mutex_destroy(&iter->mutex);
3209         free_cpumask_var(iter->started);
3210         kfree(iter->trace);
3211         kfree(iter->buffer_iter);
3212         seq_release_private(inode, file);
3213
3214         return 0;
3215 }
3216
3217 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3218 {
3219         struct trace_array *tr = inode->i_private;
3220
3221         trace_array_put(tr);
3222         return 0;
3223 }
3224
3225 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3226 {
3227         struct trace_array *tr = inode->i_private;
3228
3229         trace_array_put(tr);
3230
3231         return single_release(inode, file);
3232 }
3233
3234 static int tracing_open(struct inode *inode, struct file *file)
3235 {
3236         struct trace_array *tr = inode->i_private;
3237         struct trace_iterator *iter;
3238         int ret = 0;
3239
3240         if (trace_array_get(tr) < 0)
3241                 return -ENODEV;
3242
3243         /* If this file was open for write, then erase contents */
3244         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3245                 int cpu = tracing_get_cpu(inode);
3246                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3247
3248 #ifdef CONFIG_TRACER_MAX_TRACE
3249                 if (tr->current_trace->print_max)
3250                         trace_buf = &tr->max_buffer;
3251 #endif
3252
3253                 if (cpu == RING_BUFFER_ALL_CPUS)
3254                         tracing_reset_online_cpus(trace_buf);
3255                 else
3256                         tracing_reset(trace_buf, cpu);
3257         }
3258
3259         if (file->f_mode & FMODE_READ) {
3260                 iter = __tracing_open(inode, file, false);
3261                 if (IS_ERR(iter))
3262                         ret = PTR_ERR(iter);
3263                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3264                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3265         }
3266
3267         if (ret < 0)
3268                 trace_array_put(tr);
3269
3270         return ret;
3271 }
3272
3273 /*
3274  * Some tracers are not suitable for instance buffers.
3275  * A tracer is always available for the global array (toplevel)
3276  * or if it explicitly states that it is.
3277  */
3278 static bool
3279 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3280 {
3281         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3282 }
3283
3284 /* Find the next tracer that this trace array may use */
3285 static struct tracer *
3286 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3287 {
3288         while (t && !trace_ok_for_array(t, tr))
3289                 t = t->next;
3290
3291         return t;
3292 }
3293
3294 static void *
3295 t_next(struct seq_file *m, void *v, loff_t *pos)
3296 {
3297         struct trace_array *tr = m->private;
3298         struct tracer *t = v;
3299
3300         (*pos)++;
3301
3302         if (t)
3303                 t = get_tracer_for_array(tr, t->next);
3304
3305         return t;
3306 }
3307
3308 static void *t_start(struct seq_file *m, loff_t *pos)
3309 {
3310         struct trace_array *tr = m->private;
3311         struct tracer *t;
3312         loff_t l = 0;
3313
3314         mutex_lock(&trace_types_lock);
3315
3316         t = get_tracer_for_array(tr, trace_types);
3317         for (; t && l < *pos; t = t_next(m, t, &l))
3318                         ;
3319
3320         return t;
3321 }
3322
3323 static void t_stop(struct seq_file *m, void *p)
3324 {
3325         mutex_unlock(&trace_types_lock);
3326 }
3327
3328 static int t_show(struct seq_file *m, void *v)
3329 {
3330         struct tracer *t = v;
3331
3332         if (!t)
3333                 return 0;
3334
3335         seq_puts(m, t->name);
3336         if (t->next)
3337                 seq_putc(m, ' ');
3338         else
3339                 seq_putc(m, '\n');
3340
3341         return 0;
3342 }
3343
3344 static const struct seq_operations show_traces_seq_ops = {
3345         .start          = t_start,
3346         .next           = t_next,
3347         .stop           = t_stop,
3348         .show           = t_show,
3349 };
3350
3351 static int show_traces_open(struct inode *inode, struct file *file)
3352 {
3353         struct trace_array *tr = inode->i_private;
3354         struct seq_file *m;
3355         int ret;
3356
3357         if (tracing_disabled)
3358                 return -ENODEV;
3359
3360         if (trace_array_get(tr) < 0)
3361                 return -ENODEV;
3362
3363         ret = seq_open(file, &show_traces_seq_ops);
3364         if (ret) {
3365                 trace_array_put(tr);
3366                 return ret;
3367         }
3368
3369         m = file->private_data;
3370         m->private = tr;
3371
3372         return 0;
3373 }
3374
3375 static int show_traces_release(struct inode *inode, struct file *file)
3376 {
3377         struct trace_array *tr = inode->i_private;
3378
3379         trace_array_put(tr);
3380         return seq_release(inode, file);
3381 }
3382
3383 static ssize_t
3384 tracing_write_stub(struct file *filp, const char __user *ubuf,
3385                    size_t count, loff_t *ppos)
3386 {
3387         return count;
3388 }
3389
3390 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3391 {
3392         int ret;
3393
3394         if (file->f_mode & FMODE_READ)
3395                 ret = seq_lseek(file, offset, whence);
3396         else
3397                 file->f_pos = ret = 0;
3398
3399         return ret;
3400 }
3401
3402 static const struct file_operations tracing_fops = {
3403         .open           = tracing_open,
3404         .read           = seq_read,
3405         .write          = tracing_write_stub,
3406         .llseek         = tracing_lseek,
3407         .release        = tracing_release,
3408 };
3409
3410 static const struct file_operations show_traces_fops = {
3411         .open           = show_traces_open,
3412         .read           = seq_read,
3413         .llseek         = seq_lseek,
3414         .release        = show_traces_release,
3415 };
3416
3417 static ssize_t
3418 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3419                      size_t count, loff_t *ppos)
3420 {
3421         struct trace_array *tr = file_inode(filp)->i_private;
3422         char *mask_str;
3423         int len;
3424
3425         len = snprintf(NULL, 0, "%*pb\n",
3426                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3427         mask_str = kmalloc(len, GFP_KERNEL);
3428         if (!mask_str)
3429                 return -ENOMEM;
3430
3431         len = snprintf(mask_str, len, "%*pb\n",
3432                        cpumask_pr_args(tr->tracing_cpumask));
3433         if (len >= count) {
3434                 count = -EINVAL;
3435                 goto out_err;
3436         }
3437         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3438
3439 out_err:
3440         kfree(mask_str);
3441
3442         return count;
3443 }
3444
3445 static ssize_t
3446 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3447                       size_t count, loff_t *ppos)
3448 {
3449         struct trace_array *tr = file_inode(filp)->i_private;
3450         cpumask_var_t tracing_cpumask_new;
3451         int err, cpu;
3452
3453         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3454                 return -ENOMEM;
3455
3456         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3457         if (err)
3458                 goto err_unlock;
3459
3460         local_irq_disable();
3461         arch_spin_lock(&tr->max_lock);
3462         for_each_tracing_cpu(cpu) {
3463                 /*
3464                  * Increase/decrease the disabled counter if we are
3465                  * about to flip a bit in the cpumask:
3466                  */
3467                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3468                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3469                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3470                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3471                 }
3472                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3473                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3474                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3475                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3476                 }
3477         }
3478         arch_spin_unlock(&tr->max_lock);
3479         local_irq_enable();
3480
3481         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3482         free_cpumask_var(tracing_cpumask_new);
3483
3484         return count;
3485
3486 err_unlock:
3487         free_cpumask_var(tracing_cpumask_new);
3488
3489         return err;
3490 }
3491
3492 static const struct file_operations tracing_cpumask_fops = {
3493         .open           = tracing_open_generic_tr,
3494         .read           = tracing_cpumask_read,
3495         .write          = tracing_cpumask_write,
3496         .release        = tracing_release_generic_tr,
3497         .llseek         = generic_file_llseek,
3498 };
3499
3500 static int tracing_trace_options_show(struct seq_file *m, void *v)
3501 {
3502         struct tracer_opt *trace_opts;
3503         struct trace_array *tr = m->private;
3504         u32 tracer_flags;
3505         int i;
3506
3507         mutex_lock(&trace_types_lock);
3508         tracer_flags = tr->current_trace->flags->val;
3509         trace_opts = tr->current_trace->flags->opts;
3510
3511         for (i = 0; trace_options[i]; i++) {
3512                 if (tr->trace_flags & (1 << i))
3513                         seq_printf(m, "%s\n", trace_options[i]);
3514                 else
3515                         seq_printf(m, "no%s\n", trace_options[i]);
3516         }
3517
3518         for (i = 0; trace_opts[i].name; i++) {
3519                 if (tracer_flags & trace_opts[i].bit)
3520                         seq_printf(m, "%s\n", trace_opts[i].name);
3521                 else
3522                         seq_printf(m, "no%s\n", trace_opts[i].name);
3523         }
3524         mutex_unlock(&trace_types_lock);
3525
3526         return 0;
3527 }
3528
3529 static int __set_tracer_option(struct trace_array *tr,
3530                                struct tracer_flags *tracer_flags,
3531                                struct tracer_opt *opts, int neg)
3532 {
3533         struct tracer *trace = tr->current_trace;
3534         int ret;
3535
3536         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3537         if (ret)
3538                 return ret;
3539
3540         if (neg)
3541                 tracer_flags->val &= ~opts->bit;
3542         else
3543                 tracer_flags->val |= opts->bit;
3544         return 0;
3545 }
3546
3547 /* Try to assign a tracer specific option */
3548 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3549 {
3550         struct tracer *trace = tr->current_trace;
3551         struct tracer_flags *tracer_flags = trace->flags;
3552         struct tracer_opt *opts = NULL;
3553         int i;
3554
3555         for (i = 0; tracer_flags->opts[i].name; i++) {
3556                 opts = &tracer_flags->opts[i];
3557
3558                 if (strcmp(cmp, opts->name) == 0)
3559                         return __set_tracer_option(tr, trace->flags, opts, neg);
3560         }
3561
3562         return -EINVAL;
3563 }
3564
3565 /* Some tracers require overwrite to stay enabled */
3566 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3567 {
3568         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3569                 return -1;
3570
3571         return 0;
3572 }
3573
3574 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3575 {
3576         /* do nothing if flag is already set */
3577         if (!!(tr->trace_flags & mask) == !!enabled)
3578                 return 0;
3579
3580         /* Give the tracer a chance to approve the change */
3581         if (tr->current_trace->flag_changed)
3582                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3583                         return -EINVAL;
3584
3585         if (enabled)
3586                 tr->trace_flags |= mask;
3587         else
3588                 tr->trace_flags &= ~mask;
3589
3590         if (mask == TRACE_ITER_RECORD_CMD)
3591                 trace_event_enable_cmd_record(enabled);
3592
3593         if (mask == TRACE_ITER_OVERWRITE) {
3594                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3595 #ifdef CONFIG_TRACER_MAX_TRACE
3596                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3597 #endif
3598         }
3599
3600         if (mask == TRACE_ITER_PRINTK) {
3601                 trace_printk_start_stop_comm(enabled);
3602                 trace_printk_control(enabled);
3603         }
3604
3605         return 0;
3606 }
3607
3608 static int trace_set_options(struct trace_array *tr, char *option)
3609 {
3610         char *cmp;
3611         int neg = 0;
3612         int ret = -ENODEV;
3613         int i;
3614         size_t orig_len = strlen(option);
3615
3616         cmp = strstrip(option);
3617
3618         if (strncmp(cmp, "no", 2) == 0) {
3619                 neg = 1;
3620                 cmp += 2;
3621         }
3622
3623         mutex_lock(&trace_types_lock);
3624
3625         for (i = 0; trace_options[i]; i++) {
3626                 if (strcmp(cmp, trace_options[i]) == 0) {
3627                         ret = set_tracer_flag(tr, 1 << i, !neg);
3628                         break;
3629                 }
3630         }
3631
3632         /* If no option could be set, test the specific tracer options */
3633         if (!trace_options[i])
3634                 ret = set_tracer_option(tr, cmp, neg);
3635
3636         mutex_unlock(&trace_types_lock);
3637
3638         /*
3639          * If the first trailing whitespace is replaced with '\0' by strstrip,
3640          * turn it back into a space.
3641          */
3642         if (orig_len > strlen(option))
3643                 option[strlen(option)] = ' ';
3644
3645         return ret;
3646 }
3647
3648 static void __init apply_trace_boot_options(void)
3649 {
3650         char *buf = trace_boot_options_buf;
3651         char *option;
3652
3653         while (true) {
3654                 option = strsep(&buf, ",");
3655
3656                 if (!option)
3657                         break;
3658
3659                 if (*option)
3660                         trace_set_options(&global_trace, option);
3661
3662                 /* Put back the comma to allow this to be called again */
3663                 if (buf)
3664                         *(buf - 1) = ',';
3665         }
3666 }
3667
3668 static ssize_t
3669 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3670                         size_t cnt, loff_t *ppos)
3671 {
3672         struct seq_file *m = filp->private_data;
3673         struct trace_array *tr = m->private;
3674         char buf[64];
3675         int ret;
3676
3677         if (cnt >= sizeof(buf))
3678                 return -EINVAL;
3679
3680         if (copy_from_user(&buf, ubuf, cnt))
3681                 return -EFAULT;
3682
3683         buf[cnt] = 0;
3684
3685         ret = trace_set_options(tr, buf);
3686         if (ret < 0)
3687                 return ret;
3688
3689         *ppos += cnt;
3690
3691         return cnt;
3692 }
3693
3694 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3695 {
3696         struct trace_array *tr = inode->i_private;
3697         int ret;
3698
3699         if (tracing_disabled)
3700                 return -ENODEV;
3701
3702         if (trace_array_get(tr) < 0)
3703                 return -ENODEV;
3704
3705         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3706         if (ret < 0)
3707                 trace_array_put(tr);
3708
3709         return ret;
3710 }
3711
3712 static const struct file_operations tracing_iter_fops = {
3713         .open           = tracing_trace_options_open,
3714         .read           = seq_read,
3715         .llseek         = seq_lseek,
3716         .release        = tracing_single_release_tr,
3717         .write          = tracing_trace_options_write,
3718 };
3719
3720 static const char readme_msg[] =
3721         "tracing mini-HOWTO:\n\n"
3722         "# echo 0 > tracing_on : quick way to disable tracing\n"
3723         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3724         " Important files:\n"
3725         "  trace\t\t\t- The static contents of the buffer\n"
3726         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3727         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3728         "  current_tracer\t- function and latency tracers\n"
3729         "  available_tracers\t- list of configured tracers for current_tracer\n"
3730         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3731         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3732         "  trace_clock\t\t-change the clock used to order events\n"
3733         "       local:   Per cpu clock but may not be synced across CPUs\n"
3734         "      global:   Synced across CPUs but slows tracing down.\n"
3735         "     counter:   Not a clock, but just an increment\n"
3736         "      uptime:   Jiffy counter from time of boot\n"
3737         "        perf:   Same clock that perf events use\n"
3738 #ifdef CONFIG_X86_64
3739         "     x86-tsc:   TSC cycle counter\n"
3740 #endif
3741         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3742         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3743         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3744         "\t\t\t  Remove sub-buffer with rmdir\n"
3745         "  trace_options\t\t- Set format or modify how tracing happens\n"
3746         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3747         "\t\t\t  option name\n"
3748         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3749 #ifdef CONFIG_DYNAMIC_FTRACE
3750         "\n  available_filter_functions - list of functions that can be filtered on\n"
3751         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3752         "\t\t\t  functions\n"
3753         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3754         "\t     modules: Can select a group via module\n"
3755         "\t      Format: :mod:<module-name>\n"
3756         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3757         "\t    triggers: a command to perform when function is hit\n"
3758         "\t      Format: <function>:<trigger>[:count]\n"
3759         "\t     trigger: traceon, traceoff\n"
3760         "\t\t      enable_event:<system>:<event>\n"
3761         "\t\t      disable_event:<system>:<event>\n"
3762 #ifdef CONFIG_STACKTRACE
3763         "\t\t      stacktrace\n"
3764 #endif
3765 #ifdef CONFIG_TRACER_SNAPSHOT
3766         "\t\t      snapshot\n"
3767 #endif
3768         "\t\t      dump\n"
3769         "\t\t      cpudump\n"
3770         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3771         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3772         "\t     The first one will disable tracing every time do_fault is hit\n"
3773         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3774         "\t       The first time do trap is hit and it disables tracing, the\n"
3775         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3776         "\t       the counter will not decrement. It only decrements when the\n"
3777         "\t       trigger did work\n"
3778         "\t     To remove trigger without count:\n"
3779         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3780         "\t     To remove trigger with a count:\n"
3781         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3782         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3783         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3784         "\t    modules: Can select a group via module command :mod:\n"
3785         "\t    Does not accept triggers\n"
3786 #endif /* CONFIG_DYNAMIC_FTRACE */
3787 #ifdef CONFIG_FUNCTION_TRACER
3788         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3789         "\t\t    (function)\n"
3790 #endif
3791 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3792         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3793         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3794         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3795 #endif
3796 #ifdef CONFIG_TRACER_SNAPSHOT
3797         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3798         "\t\t\t  snapshot buffer. Read the contents for more\n"
3799         "\t\t\t  information\n"
3800 #endif
3801 #ifdef CONFIG_STACK_TRACER
3802         "  stack_trace\t\t- Shows the max stack trace when active\n"
3803         "  stack_max_size\t- Shows current max stack size that was traced\n"
3804         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3805         "\t\t\t  new trace)\n"
3806 #ifdef CONFIG_DYNAMIC_FTRACE
3807         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3808         "\t\t\t  traces\n"
3809 #endif
3810 #endif /* CONFIG_STACK_TRACER */
3811         "  events/\t\t- Directory containing all trace event subsystems:\n"
3812         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3813         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3814         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3815         "\t\t\t  events\n"
3816         "      filter\t\t- If set, only events passing filter are traced\n"
3817         "  events/<system>/<event>/\t- Directory containing control files for\n"
3818         "\t\t\t  <event>:\n"
3819         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3820         "      filter\t\t- If set, only events passing filter are traced\n"
3821         "      trigger\t\t- If set, a command to perform when event is hit\n"
3822         "\t    Format: <trigger>[:count][if <filter>]\n"
3823         "\t   trigger: traceon, traceoff\n"
3824         "\t            enable_event:<system>:<event>\n"
3825         "\t            disable_event:<system>:<event>\n"
3826 #ifdef CONFIG_STACKTRACE
3827         "\t\t    stacktrace\n"
3828 #endif
3829 #ifdef CONFIG_TRACER_SNAPSHOT
3830         "\t\t    snapshot\n"
3831 #endif
3832         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3833         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3834         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3835         "\t                  events/block/block_unplug/trigger\n"
3836         "\t   The first disables tracing every time block_unplug is hit.\n"
3837         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3838         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3839         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3840         "\t   Like function triggers, the counter is only decremented if it\n"
3841         "\t    enabled or disabled tracing.\n"
3842         "\t   To remove a trigger without a count:\n"
3843         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3844         "\t   To remove a trigger with a count:\n"
3845         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3846         "\t   Filters can be ignored when removing a trigger.\n"
3847 ;
3848
3849 static ssize_t
3850 tracing_readme_read(struct file *filp, char __user *ubuf,
3851                        size_t cnt, loff_t *ppos)
3852 {
3853         return simple_read_from_buffer(ubuf, cnt, ppos,
3854                                         readme_msg, strlen(readme_msg));
3855 }
3856
3857 static const struct file_operations tracing_readme_fops = {
3858         .open           = tracing_open_generic,
3859         .read           = tracing_readme_read,
3860         .llseek         = generic_file_llseek,
3861 };
3862
3863 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3864 {
3865         unsigned int *ptr = v;
3866
3867         if (*pos || m->count)
3868                 ptr++;
3869
3870         (*pos)++;
3871
3872         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3873              ptr++) {
3874                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3875                         continue;
3876
3877                 return ptr;
3878         }
3879
3880         return NULL;
3881 }
3882
3883 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3884 {
3885         void *v;
3886         loff_t l = 0;
3887
3888         preempt_disable();
3889         arch_spin_lock(&trace_cmdline_lock);
3890
3891         v = &savedcmd->map_cmdline_to_pid[0];
3892         while (l <= *pos) {
3893                 v = saved_cmdlines_next(m, v, &l);
3894                 if (!v)
3895                         return NULL;
3896         }
3897
3898         return v;
3899 }
3900
3901 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3902 {
3903         arch_spin_unlock(&trace_cmdline_lock);
3904         preempt_enable();
3905 }
3906
3907 static int saved_cmdlines_show(struct seq_file *m, void *v)
3908 {
3909         char buf[TASK_COMM_LEN];
3910         unsigned int *pid = v;
3911
3912         __trace_find_cmdline(*pid, buf);
3913         seq_printf(m, "%d %s\n", *pid, buf);
3914         return 0;
3915 }
3916
3917 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3918         .start          = saved_cmdlines_start,
3919         .next           = saved_cmdlines_next,
3920         .stop           = saved_cmdlines_stop,
3921         .show           = saved_cmdlines_show,
3922 };
3923
3924 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3925 {
3926         if (tracing_disabled)
3927                 return -ENODEV;
3928
3929         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3930 }
3931
3932 static const struct file_operations tracing_saved_cmdlines_fops = {
3933         .open           = tracing_saved_cmdlines_open,
3934         .read           = seq_read,
3935         .llseek         = seq_lseek,
3936         .release        = seq_release,
3937 };
3938
3939 static ssize_t
3940 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3941                                  size_t cnt, loff_t *ppos)
3942 {
3943         char buf[64];
3944         int r;
3945
3946         arch_spin_lock(&trace_cmdline_lock);
3947         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3948         arch_spin_unlock(&trace_cmdline_lock);
3949
3950         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3951 }
3952
3953 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3954 {
3955         kfree(s->saved_cmdlines);
3956         kfree(s->map_cmdline_to_pid);
3957         kfree(s);
3958 }
3959
3960 static int tracing_resize_saved_cmdlines(unsigned int val)
3961 {
3962         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3963
3964         s = kmalloc(sizeof(*s), GFP_KERNEL);
3965         if (!s)
3966                 return -ENOMEM;
3967
3968         if (allocate_cmdlines_buffer(val, s) < 0) {
3969                 kfree(s);
3970                 return -ENOMEM;
3971         }
3972
3973         arch_spin_lock(&trace_cmdline_lock);
3974         savedcmd_temp = savedcmd;
3975         savedcmd = s;
3976         arch_spin_unlock(&trace_cmdline_lock);
3977         free_saved_cmdlines_buffer(savedcmd_temp);
3978
3979         return 0;
3980 }
3981
3982 static ssize_t
3983 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3984                                   size_t cnt, loff_t *ppos)
3985 {
3986         unsigned long val;
3987         int ret;
3988
3989         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3990         if (ret)
3991                 return ret;
3992
3993         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3994         if (!val || val > PID_MAX_DEFAULT)
3995                 return -EINVAL;
3996
3997         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3998         if (ret < 0)
3999                 return ret;
4000
4001         *ppos += cnt;
4002
4003         return cnt;
4004 }
4005
4006 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4007         .open           = tracing_open_generic,
4008         .read           = tracing_saved_cmdlines_size_read,
4009         .write          = tracing_saved_cmdlines_size_write,
4010 };
4011
4012 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4013 static union trace_enum_map_item *
4014 update_enum_map(union trace_enum_map_item *ptr)
4015 {
4016         if (!ptr->map.enum_string) {
4017                 if (ptr->tail.next) {
4018                         ptr = ptr->tail.next;
4019                         /* Set ptr to the next real item (skip head) */
4020                         ptr++;
4021                 } else
4022                         return NULL;
4023         }
4024         return ptr;
4025 }
4026
4027 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4028 {
4029         union trace_enum_map_item *ptr = v;
4030
4031         /*
4032          * Paranoid! If ptr points to end, we don't want to increment past it.
4033          * This really should never happen.
4034          */
4035         ptr = update_enum_map(ptr);
4036         if (WARN_ON_ONCE(!ptr))
4037                 return NULL;
4038
4039         ptr++;
4040
4041         (*pos)++;
4042
4043         ptr = update_enum_map(ptr);
4044
4045         return ptr;
4046 }
4047
4048 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4049 {
4050         union trace_enum_map_item *v;
4051         loff_t l = 0;
4052
4053         mutex_lock(&trace_enum_mutex);
4054
4055         v = trace_enum_maps;
4056         if (v)
4057                 v++;
4058
4059         while (v && l < *pos) {
4060                 v = enum_map_next(m, v, &l);
4061         }
4062
4063         return v;
4064 }
4065
4066 static void enum_map_stop(struct seq_file *m, void *v)
4067 {
4068         mutex_unlock(&trace_enum_mutex);
4069 }
4070
4071 static int enum_map_show(struct seq_file *m, void *v)
4072 {
4073         union trace_enum_map_item *ptr = v;
4074
4075         seq_printf(m, "%s %ld (%s)\n",
4076                    ptr->map.enum_string, ptr->map.enum_value,
4077                    ptr->map.system);
4078
4079         return 0;
4080 }
4081
4082 static const struct seq_operations tracing_enum_map_seq_ops = {
4083         .start          = enum_map_start,
4084         .next           = enum_map_next,
4085         .stop           = enum_map_stop,
4086         .show           = enum_map_show,
4087 };
4088
4089 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4090 {
4091         if (tracing_disabled)
4092                 return -ENODEV;
4093
4094         return seq_open(filp, &tracing_enum_map_seq_ops);
4095 }
4096
4097 static const struct file_operations tracing_enum_map_fops = {
4098         .open           = tracing_enum_map_open,
4099         .read           = seq_read,
4100         .llseek         = seq_lseek,
4101         .release        = seq_release,
4102 };
4103
4104 static inline union trace_enum_map_item *
4105 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4106 {
4107         /* Return tail of array given the head */
4108         return ptr + ptr->head.length + 1;
4109 }
4110
4111 static void
4112 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4113                            int len)
4114 {
4115         struct trace_enum_map **stop;
4116         struct trace_enum_map **map;
4117         union trace_enum_map_item *map_array;
4118         union trace_enum_map_item *ptr;
4119
4120         stop = start + len;
4121
4122         /*
4123          * The trace_enum_maps contains the map plus a head and tail item,
4124          * where the head holds the module and length of array, and the
4125          * tail holds a pointer to the next list.
4126          */
4127         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4128         if (!map_array) {
4129                 pr_warning("Unable to allocate trace enum mapping\n");
4130                 return;
4131         }
4132
4133         mutex_lock(&trace_enum_mutex);
4134
4135         if (!trace_enum_maps)
4136                 trace_enum_maps = map_array;
4137         else {
4138                 ptr = trace_enum_maps;
4139                 for (;;) {
4140                         ptr = trace_enum_jmp_to_tail(ptr);
4141                         if (!ptr->tail.next)
4142                                 break;
4143                         ptr = ptr->tail.next;
4144
4145                 }
4146                 ptr->tail.next = map_array;
4147         }
4148         map_array->head.mod = mod;
4149         map_array->head.length = len;
4150         map_array++;
4151
4152         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4153                 map_array->map = **map;
4154                 map_array++;
4155         }
4156         memset(map_array, 0, sizeof(*map_array));
4157
4158         mutex_unlock(&trace_enum_mutex);
4159 }
4160
4161 static void trace_create_enum_file(struct dentry *d_tracer)
4162 {
4163         trace_create_file("enum_map", 0444, d_tracer,
4164                           NULL, &tracing_enum_map_fops);
4165 }
4166
4167 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4168 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4169 static inline void trace_insert_enum_map_file(struct module *mod,
4170                               struct trace_enum_map **start, int len) { }
4171 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4172
4173 static void trace_insert_enum_map(struct module *mod,
4174                                   struct trace_enum_map **start, int len)
4175 {
4176         struct trace_enum_map **map;
4177
4178         if (len <= 0)
4179                 return;
4180
4181         map = start;
4182
4183         trace_event_enum_update(map, len);
4184
4185         trace_insert_enum_map_file(mod, start, len);
4186 }
4187
4188 static ssize_t
4189 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4190                        size_t cnt, loff_t *ppos)
4191 {
4192         struct trace_array *tr = filp->private_data;
4193         char buf[MAX_TRACER_SIZE+2];
4194         int r;
4195
4196         mutex_lock(&trace_types_lock);
4197         r = sprintf(buf, "%s\n", tr->current_trace->name);
4198         mutex_unlock(&trace_types_lock);
4199
4200         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4201 }
4202
4203 int tracer_init(struct tracer *t, struct trace_array *tr)
4204 {
4205         tracing_reset_online_cpus(&tr->trace_buffer);
4206         return t->init(tr);
4207 }
4208
4209 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4210 {
4211         int cpu;
4212
4213         for_each_tracing_cpu(cpu)
4214                 per_cpu_ptr(buf->data, cpu)->entries = val;
4215 }
4216
4217 #ifdef CONFIG_TRACER_MAX_TRACE
4218 /* resize @tr's buffer to the size of @size_tr's entries */
4219 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4220                                         struct trace_buffer *size_buf, int cpu_id)
4221 {
4222         int cpu, ret = 0;
4223
4224         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4225                 for_each_tracing_cpu(cpu) {
4226                         ret = ring_buffer_resize(trace_buf->buffer,
4227                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4228                         if (ret < 0)
4229                                 break;
4230                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4231                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4232                 }
4233         } else {
4234                 ret = ring_buffer_resize(trace_buf->buffer,
4235                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4236                 if (ret == 0)
4237                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4238                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4239         }
4240
4241         return ret;
4242 }
4243 #endif /* CONFIG_TRACER_MAX_TRACE */
4244
4245 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4246                                         unsigned long size, int cpu)
4247 {
4248         int ret;
4249
4250         /*
4251          * If kernel or user changes the size of the ring buffer
4252          * we use the size that was given, and we can forget about
4253          * expanding it later.
4254          */
4255         ring_buffer_expanded = true;
4256
4257         /* May be called before buffers are initialized */
4258         if (!tr->trace_buffer.buffer)
4259                 return 0;
4260
4261         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4262         if (ret < 0)
4263                 return ret;
4264
4265 #ifdef CONFIG_TRACER_MAX_TRACE
4266         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4267             !tr->current_trace->use_max_tr)
4268                 goto out;
4269
4270         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4271         if (ret < 0) {
4272                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4273                                                      &tr->trace_buffer, cpu);
4274                 if (r < 0) {
4275                         /*
4276                          * AARGH! We are left with different
4277                          * size max buffer!!!!
4278                          * The max buffer is our "snapshot" buffer.
4279                          * When a tracer needs a snapshot (one of the
4280                          * latency tracers), it swaps the max buffer
4281                          * with the saved snap shot. We succeeded to
4282                          * update the size of the main buffer, but failed to
4283                          * update the size of the max buffer. But when we tried
4284                          * to reset the main buffer to the original size, we
4285                          * failed there too. This is very unlikely to
4286                          * happen, but if it does, warn and kill all
4287                          * tracing.
4288                          */
4289                         WARN_ON(1);
4290                         tracing_disabled = 1;
4291                 }
4292                 return ret;
4293         }
4294
4295         if (cpu == RING_BUFFER_ALL_CPUS)
4296                 set_buffer_entries(&tr->max_buffer, size);
4297         else
4298                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4299
4300  out:
4301 #endif /* CONFIG_TRACER_MAX_TRACE */
4302
4303         if (cpu == RING_BUFFER_ALL_CPUS)
4304                 set_buffer_entries(&tr->trace_buffer, size);
4305         else
4306                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4307
4308         return ret;
4309 }
4310
4311 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4312                                           unsigned long size, int cpu_id)
4313 {
4314         int ret = size;
4315
4316         mutex_lock(&trace_types_lock);
4317
4318         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4319                 /* make sure, this cpu is enabled in the mask */
4320                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4321                         ret = -EINVAL;
4322                         goto out;
4323                 }
4324         }
4325
4326         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4327         if (ret < 0)
4328                 ret = -ENOMEM;
4329
4330 out:
4331         mutex_unlock(&trace_types_lock);
4332
4333         return ret;
4334 }
4335
4336
4337 /**
4338  * tracing_update_buffers - used by tracing facility to expand ring buffers
4339  *
4340  * To save on memory when the tracing is never used on a system with it
4341  * configured in. The ring buffers are set to a minimum size. But once
4342  * a user starts to use the tracing facility, then they need to grow
4343  * to their default size.
4344  *
4345  * This function is to be called when a tracer is about to be used.
4346  */
4347 int tracing_update_buffers(void)
4348 {
4349         int ret = 0;
4350
4351         mutex_lock(&trace_types_lock);
4352         if (!ring_buffer_expanded)
4353                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4354                                                 RING_BUFFER_ALL_CPUS);
4355         mutex_unlock(&trace_types_lock);
4356
4357         return ret;
4358 }
4359
4360 struct trace_option_dentry;
4361
4362 static void
4363 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4364
4365 /*
4366  * Used to clear out the tracer before deletion of an instance.
4367  * Must have trace_types_lock held.
4368  */
4369 static void tracing_set_nop(struct trace_array *tr)
4370 {
4371         if (tr->current_trace == &nop_trace)
4372                 return;
4373         
4374         tr->current_trace->enabled--;
4375
4376         if (tr->current_trace->reset)
4377                 tr->current_trace->reset(tr);
4378
4379         tr->current_trace = &nop_trace;
4380 }
4381
4382 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4383 {
4384         /* Only enable if the directory has been created already. */
4385         if (!tr->dir)
4386                 return;
4387
4388         create_trace_option_files(tr, t);
4389 }
4390
4391 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4392 {
4393         struct tracer *t;
4394 #ifdef CONFIG_TRACER_MAX_TRACE
4395         bool had_max_tr;
4396 #endif
4397         int ret = 0;
4398
4399         mutex_lock(&trace_types_lock);
4400
4401         if (!ring_buffer_expanded) {
4402                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4403                                                 RING_BUFFER_ALL_CPUS);
4404                 if (ret < 0)
4405                         goto out;
4406                 ret = 0;
4407         }
4408
4409         for (t = trace_types; t; t = t->next) {
4410                 if (strcmp(t->name, buf) == 0)
4411                         break;
4412         }
4413         if (!t) {
4414                 ret = -EINVAL;
4415                 goto out;
4416         }
4417         if (t == tr->current_trace)
4418                 goto out;
4419
4420         /* Some tracers are only allowed for the top level buffer */
4421         if (!trace_ok_for_array(t, tr)) {
4422                 ret = -EINVAL;
4423                 goto out;
4424         }
4425
4426         /* If trace pipe files are being read, we can't change the tracer */
4427         if (tr->current_trace->ref) {
4428                 ret = -EBUSY;
4429                 goto out;
4430         }
4431
4432         trace_branch_disable();
4433
4434         tr->current_trace->enabled--;
4435
4436         if (tr->current_trace->reset)
4437                 tr->current_trace->reset(tr);
4438
4439         /* Current trace needs to be nop_trace before synchronize_sched */
4440         tr->current_trace = &nop_trace;
4441
4442 #ifdef CONFIG_TRACER_MAX_TRACE
4443         had_max_tr = tr->allocated_snapshot;
4444
4445         if (had_max_tr && !t->use_max_tr) {
4446                 /*
4447                  * We need to make sure that the update_max_tr sees that
4448                  * current_trace changed to nop_trace to keep it from
4449                  * swapping the buffers after we resize it.
4450                  * The update_max_tr is called from interrupts disabled
4451                  * so a synchronized_sched() is sufficient.
4452                  */
4453                 synchronize_sched();
4454                 free_snapshot(tr);
4455         }
4456 #endif
4457
4458 #ifdef CONFIG_TRACER_MAX_TRACE
4459         if (t->use_max_tr && !had_max_tr) {
4460                 ret = alloc_snapshot(tr);
4461                 if (ret < 0)
4462                         goto out;
4463         }
4464 #endif
4465
4466         if (t->init) {
4467                 ret = tracer_init(t, tr);
4468                 if (ret)
4469                         goto out;
4470         }
4471
4472         tr->current_trace = t;
4473         tr->current_trace->enabled++;
4474         trace_branch_enable(tr);
4475  out:
4476         mutex_unlock(&trace_types_lock);
4477
4478         return ret;
4479 }
4480
4481 static ssize_t
4482 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4483                         size_t cnt, loff_t *ppos)
4484 {
4485         struct trace_array *tr = filp->private_data;
4486         char buf[MAX_TRACER_SIZE+1];
4487         int i;
4488         size_t ret;
4489         int err;
4490
4491         ret = cnt;
4492
4493         if (cnt > MAX_TRACER_SIZE)
4494                 cnt = MAX_TRACER_SIZE;
4495
4496         if (copy_from_user(&buf, ubuf, cnt))
4497                 return -EFAULT;
4498
4499         buf[cnt] = 0;
4500
4501         /* strip ending whitespace. */
4502         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4503                 buf[i] = 0;
4504
4505         err = tracing_set_tracer(tr, buf);
4506         if (err)
4507                 return err;
4508
4509         *ppos += ret;
4510
4511         return ret;
4512 }
4513
4514 static ssize_t
4515 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4516                    size_t cnt, loff_t *ppos)
4517 {
4518         char buf[64];
4519         int r;
4520
4521         r = snprintf(buf, sizeof(buf), "%ld\n",
4522                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4523         if (r > sizeof(buf))
4524                 r = sizeof(buf);
4525         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4526 }
4527
4528 static ssize_t
4529 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4530                     size_t cnt, loff_t *ppos)
4531 {
4532         unsigned long val;
4533         int ret;
4534
4535         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4536         if (ret)
4537                 return ret;
4538
4539         *ptr = val * 1000;
4540
4541         return cnt;
4542 }
4543
4544 static ssize_t
4545 tracing_thresh_read(struct file *filp, char __user *ubuf,
4546                     size_t cnt, loff_t *ppos)
4547 {
4548         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4549 }
4550
4551 static ssize_t
4552 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4553                      size_t cnt, loff_t *ppos)
4554 {
4555         struct trace_array *tr = filp->private_data;
4556         int ret;
4557
4558         mutex_lock(&trace_types_lock);
4559         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4560         if (ret < 0)
4561                 goto out;
4562
4563         if (tr->current_trace->update_thresh) {
4564                 ret = tr->current_trace->update_thresh(tr);
4565                 if (ret < 0)
4566                         goto out;
4567         }
4568
4569         ret = cnt;
4570 out:
4571         mutex_unlock(&trace_types_lock);
4572
4573         return ret;
4574 }
4575
4576 #ifdef CONFIG_TRACER_MAX_TRACE
4577
4578 static ssize_t
4579 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4580                      size_t cnt, loff_t *ppos)
4581 {
4582         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4583 }
4584
4585 static ssize_t
4586 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4587                       size_t cnt, loff_t *ppos)
4588 {
4589         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4590 }
4591
4592 #endif
4593
4594 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4595 {
4596         struct trace_array *tr = inode->i_private;
4597         struct trace_iterator *iter;
4598         int ret = 0;
4599
4600         if (tracing_disabled)
4601                 return -ENODEV;
4602
4603         if (trace_array_get(tr) < 0)
4604                 return -ENODEV;
4605
4606         mutex_lock(&trace_types_lock);
4607
4608         /* create a buffer to store the information to pass to userspace */
4609         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4610         if (!iter) {
4611                 ret = -ENOMEM;
4612                 __trace_array_put(tr);
4613                 goto out;
4614         }
4615
4616         trace_seq_init(&iter->seq);
4617         iter->trace = tr->current_trace;
4618
4619         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4620                 ret = -ENOMEM;
4621                 goto fail;
4622         }
4623
4624         /* trace pipe does not show start of buffer */
4625         cpumask_setall(iter->started);
4626
4627         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4628                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4629
4630         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4631         if (trace_clocks[tr->clock_id].in_ns)
4632                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4633
4634         iter->tr = tr;
4635         iter->trace_buffer = &tr->trace_buffer;
4636         iter->cpu_file = tracing_get_cpu(inode);
4637         mutex_init(&iter->mutex);
4638         filp->private_data = iter;
4639
4640         if (iter->trace->pipe_open)
4641                 iter->trace->pipe_open(iter);
4642
4643         nonseekable_open(inode, filp);
4644
4645         tr->current_trace->ref++;
4646 out:
4647         mutex_unlock(&trace_types_lock);
4648         return ret;
4649
4650 fail:
4651         kfree(iter);
4652         __trace_array_put(tr);
4653         mutex_unlock(&trace_types_lock);
4654         return ret;
4655 }
4656
4657 static int tracing_release_pipe(struct inode *inode, struct file *file)
4658 {
4659         struct trace_iterator *iter = file->private_data;
4660         struct trace_array *tr = inode->i_private;
4661
4662         mutex_lock(&trace_types_lock);
4663
4664         tr->current_trace->ref--;
4665
4666         if (iter->trace->pipe_close)
4667                 iter->trace->pipe_close(iter);
4668
4669         mutex_unlock(&trace_types_lock);
4670
4671         free_cpumask_var(iter->started);
4672         mutex_destroy(&iter->mutex);
4673         kfree(iter);
4674
4675         trace_array_put(tr);
4676
4677         return 0;
4678 }
4679
4680 static unsigned int
4681 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4682 {
4683         struct trace_array *tr = iter->tr;
4684
4685         /* Iterators are static, they should be filled or empty */
4686         if (trace_buffer_iter(iter, iter->cpu_file))
4687                 return POLLIN | POLLRDNORM;
4688
4689         if (tr->trace_flags & TRACE_ITER_BLOCK)
4690                 /*
4691                  * Always select as readable when in blocking mode
4692                  */
4693                 return POLLIN | POLLRDNORM;
4694         else
4695                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4696                                              filp, poll_table);
4697 }
4698
4699 static unsigned int
4700 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4701 {
4702         struct trace_iterator *iter = filp->private_data;
4703
4704         return trace_poll(iter, filp, poll_table);
4705 }
4706
4707 /* Must be called with iter->mutex held. */
4708 static int tracing_wait_pipe(struct file *filp)
4709 {
4710         struct trace_iterator *iter = filp->private_data;
4711         int ret;
4712
4713         while (trace_empty(iter)) {
4714
4715                 if ((filp->f_flags & O_NONBLOCK)) {
4716                         return -EAGAIN;
4717                 }
4718
4719                 /*
4720                  * We block until we read something and tracing is disabled.
4721                  * We still block if tracing is disabled, but we have never
4722                  * read anything. This allows a user to cat this file, and
4723                  * then enable tracing. But after we have read something,
4724                  * we give an EOF when tracing is again disabled.
4725                  *
4726                  * iter->pos will be 0 if we haven't read anything.
4727                  */
4728                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
4729                         break;
4730
4731                 mutex_unlock(&iter->mutex);
4732
4733                 ret = wait_on_pipe(iter, false);
4734
4735                 mutex_lock(&iter->mutex);
4736
4737                 if (ret)
4738                         return ret;
4739         }
4740
4741         return 1;
4742 }
4743
4744 /*
4745  * Consumer reader.
4746  */
4747 static ssize_t
4748 tracing_read_pipe(struct file *filp, char __user *ubuf,
4749                   size_t cnt, loff_t *ppos)
4750 {
4751         struct trace_iterator *iter = filp->private_data;
4752         ssize_t sret;
4753
4754         /*
4755          * Avoid more than one consumer on a single file descriptor
4756          * This is just a matter of traces coherency, the ring buffer itself
4757          * is protected.
4758          */
4759         mutex_lock(&iter->mutex);
4760
4761         /* return any leftover data */
4762         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4763         if (sret != -EBUSY)
4764                 goto out;
4765
4766         trace_seq_init(&iter->seq);
4767
4768         if (iter->trace->read) {
4769                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4770                 if (sret)
4771                         goto out;
4772         }
4773
4774 waitagain:
4775         sret = tracing_wait_pipe(filp);
4776         if (sret <= 0)
4777                 goto out;
4778
4779         /* stop when tracing is finished */
4780         if (trace_empty(iter)) {
4781                 sret = 0;
4782                 goto out;
4783         }
4784
4785         if (cnt >= PAGE_SIZE)
4786                 cnt = PAGE_SIZE - 1;
4787
4788         /* reset all but tr, trace, and overruns */
4789         memset(&iter->seq, 0,
4790                sizeof(struct trace_iterator) -
4791                offsetof(struct trace_iterator, seq));
4792         cpumask_clear(iter->started);
4793         trace_seq_init(&iter->seq);
4794         iter->pos = -1;
4795
4796         trace_event_read_lock();
4797         trace_access_lock(iter->cpu_file);
4798         while (trace_find_next_entry_inc(iter) != NULL) {
4799                 enum print_line_t ret;
4800                 int save_len = iter->seq.seq.len;
4801
4802                 ret = print_trace_line(iter);
4803                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4804                         /* don't print partial lines */
4805                         iter->seq.seq.len = save_len;
4806                         break;
4807                 }
4808                 if (ret != TRACE_TYPE_NO_CONSUME)
4809                         trace_consume(iter);
4810
4811                 if (trace_seq_used(&iter->seq) >= cnt)
4812                         break;
4813
4814                 /*
4815                  * Setting the full flag means we reached the trace_seq buffer
4816                  * size and we should leave by partial output condition above.
4817                  * One of the trace_seq_* functions is not used properly.
4818                  */
4819                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4820                           iter->ent->type);
4821         }
4822         trace_access_unlock(iter->cpu_file);
4823         trace_event_read_unlock();
4824
4825         /* Now copy what we have to the user */
4826         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4827         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4828                 trace_seq_init(&iter->seq);
4829
4830         /*
4831          * If there was nothing to send to user, in spite of consuming trace
4832          * entries, go back to wait for more entries.
4833          */
4834         if (sret == -EBUSY)
4835                 goto waitagain;
4836
4837 out:
4838         mutex_unlock(&iter->mutex);
4839
4840         return sret;
4841 }
4842
4843 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4844                                      unsigned int idx)
4845 {
4846         __free_page(spd->pages[idx]);
4847 }
4848
4849 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4850         .can_merge              = 0,
4851         .confirm                = generic_pipe_buf_confirm,
4852         .release                = generic_pipe_buf_release,
4853         .steal                  = generic_pipe_buf_steal,
4854         .get                    = generic_pipe_buf_get,
4855 };
4856
4857 static size_t
4858 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4859 {
4860         size_t count;
4861         int save_len;
4862         int ret;
4863
4864         /* Seq buffer is page-sized, exactly what we need. */
4865         for (;;) {
4866                 save_len = iter->seq.seq.len;
4867                 ret = print_trace_line(iter);
4868
4869                 if (trace_seq_has_overflowed(&iter->seq)) {
4870                         iter->seq.seq.len = save_len;
4871                         break;
4872                 }
4873
4874                 /*
4875                  * This should not be hit, because it should only
4876                  * be set if the iter->seq overflowed. But check it
4877                  * anyway to be safe.
4878                  */
4879                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4880                         iter->seq.seq.len = save_len;
4881                         break;
4882                 }
4883
4884                 count = trace_seq_used(&iter->seq) - save_len;
4885                 if (rem < count) {
4886                         rem = 0;
4887                         iter->seq.seq.len = save_len;
4888                         break;
4889                 }
4890
4891                 if (ret != TRACE_TYPE_NO_CONSUME)
4892                         trace_consume(iter);
4893                 rem -= count;
4894                 if (!trace_find_next_entry_inc(iter))   {
4895                         rem = 0;
4896                         iter->ent = NULL;
4897                         break;
4898                 }
4899         }
4900
4901         return rem;
4902 }
4903
4904 static ssize_t tracing_splice_read_pipe(struct file *filp,
4905                                         loff_t *ppos,
4906                                         struct pipe_inode_info *pipe,
4907                                         size_t len,
4908                                         unsigned int flags)
4909 {
4910         struct page *pages_def[PIPE_DEF_BUFFERS];
4911         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4912         struct trace_iterator *iter = filp->private_data;
4913         struct splice_pipe_desc spd = {
4914                 .pages          = pages_def,
4915                 .partial        = partial_def,
4916                 .nr_pages       = 0, /* This gets updated below. */
4917                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4918                 .flags          = flags,
4919                 .ops            = &tracing_pipe_buf_ops,
4920                 .spd_release    = tracing_spd_release_pipe,
4921         };
4922         ssize_t ret;
4923         size_t rem;
4924         unsigned int i;
4925
4926         if (splice_grow_spd(pipe, &spd))
4927                 return -ENOMEM;
4928
4929         mutex_lock(&iter->mutex);
4930
4931         if (iter->trace->splice_read) {
4932                 ret = iter->trace->splice_read(iter, filp,
4933                                                ppos, pipe, len, flags);
4934                 if (ret)
4935                         goto out_err;
4936         }
4937
4938         ret = tracing_wait_pipe(filp);
4939         if (ret <= 0)
4940                 goto out_err;
4941
4942         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4943                 ret = -EFAULT;
4944                 goto out_err;
4945         }
4946
4947         trace_event_read_lock();
4948         trace_access_lock(iter->cpu_file);
4949
4950         /* Fill as many pages as possible. */
4951         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4952                 spd.pages[i] = alloc_page(GFP_KERNEL);
4953                 if (!spd.pages[i])
4954                         break;
4955
4956                 rem = tracing_fill_pipe_page(rem, iter);
4957
4958                 /* Copy the data into the page, so we can start over. */
4959                 ret = trace_seq_to_buffer(&iter->seq,
4960                                           page_address(spd.pages[i]),
4961                                           trace_seq_used(&iter->seq));
4962                 if (ret < 0) {
4963                         __free_page(spd.pages[i]);
4964                         break;
4965                 }
4966                 spd.partial[i].offset = 0;
4967                 spd.partial[i].len = trace_seq_used(&iter->seq);
4968
4969                 trace_seq_init(&iter->seq);
4970         }
4971
4972         trace_access_unlock(iter->cpu_file);
4973         trace_event_read_unlock();
4974         mutex_unlock(&iter->mutex);
4975
4976         spd.nr_pages = i;
4977
4978         if (i)
4979                 ret = splice_to_pipe(pipe, &spd);
4980         else
4981                 ret = 0;
4982 out:
4983         splice_shrink_spd(&spd);
4984         return ret;
4985
4986 out_err:
4987         mutex_unlock(&iter->mutex);
4988         goto out;
4989 }
4990
4991 static ssize_t
4992 tracing_entries_read(struct file *filp, char __user *ubuf,
4993                      size_t cnt, loff_t *ppos)
4994 {
4995         struct inode *inode = file_inode(filp);
4996         struct trace_array *tr = inode->i_private;
4997         int cpu = tracing_get_cpu(inode);
4998         char buf[64];
4999         int r = 0;
5000         ssize_t ret;
5001
5002         mutex_lock(&trace_types_lock);
5003
5004         if (cpu == RING_BUFFER_ALL_CPUS) {
5005                 int cpu, buf_size_same;
5006                 unsigned long size;
5007
5008                 size = 0;
5009                 buf_size_same = 1;
5010                 /* check if all cpu sizes are same */
5011                 for_each_tracing_cpu(cpu) {
5012                         /* fill in the size from first enabled cpu */
5013                         if (size == 0)
5014                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5015                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5016                                 buf_size_same = 0;
5017                                 break;
5018                         }
5019                 }
5020
5021                 if (buf_size_same) {
5022                         if (!ring_buffer_expanded)
5023                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5024                                             size >> 10,
5025                                             trace_buf_size >> 10);
5026                         else
5027                                 r = sprintf(buf, "%lu\n", size >> 10);
5028                 } else
5029                         r = sprintf(buf, "X\n");
5030         } else
5031                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5032
5033         mutex_unlock(&trace_types_lock);
5034
5035         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5036         return ret;
5037 }
5038
5039 static ssize_t
5040 tracing_entries_write(struct file *filp, const char __user *ubuf,
5041                       size_t cnt, loff_t *ppos)
5042 {
5043         struct inode *inode = file_inode(filp);
5044         struct trace_array *tr = inode->i_private;
5045         unsigned long val;
5046         int ret;
5047
5048         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5049         if (ret)
5050                 return ret;
5051
5052         /* must have at least 1 entry */
5053         if (!val)
5054                 return -EINVAL;
5055
5056         /* value is in KB */
5057         val <<= 10;
5058         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5059         if (ret < 0)
5060                 return ret;
5061
5062         *ppos += cnt;
5063
5064         return cnt;
5065 }
5066
5067 static ssize_t
5068 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5069                                 size_t cnt, loff_t *ppos)
5070 {
5071         struct trace_array *tr = filp->private_data;
5072         char buf[64];
5073         int r, cpu;
5074         unsigned long size = 0, expanded_size = 0;
5075
5076         mutex_lock(&trace_types_lock);
5077         for_each_tracing_cpu(cpu) {
5078                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5079                 if (!ring_buffer_expanded)
5080                         expanded_size += trace_buf_size >> 10;
5081         }
5082         if (ring_buffer_expanded)
5083                 r = sprintf(buf, "%lu\n", size);
5084         else
5085                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5086         mutex_unlock(&trace_types_lock);
5087
5088         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5089 }
5090
5091 static ssize_t
5092 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5093                           size_t cnt, loff_t *ppos)
5094 {
5095         /*
5096          * There is no need to read what the user has written, this function
5097          * is just to make sure that there is no error when "echo" is used
5098          */
5099
5100         *ppos += cnt;
5101
5102         return cnt;
5103 }
5104
5105 static int
5106 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5107 {
5108         struct trace_array *tr = inode->i_private;
5109
5110         /* disable tracing ? */
5111         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5112                 tracer_tracing_off(tr);
5113         /* resize the ring buffer to 0 */
5114         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5115
5116         trace_array_put(tr);
5117
5118         return 0;
5119 }
5120
5121 static ssize_t
5122 tracing_mark_write(struct file *filp, const char __user *ubuf,
5123                                         size_t cnt, loff_t *fpos)
5124 {
5125         unsigned long addr = (unsigned long)ubuf;
5126         struct trace_array *tr = filp->private_data;
5127         struct ring_buffer_event *event;
5128         struct ring_buffer *buffer;
5129         struct print_entry *entry;
5130         unsigned long irq_flags;
5131         struct page *pages[2];
5132         void *map_page[2];
5133         int nr_pages = 1;
5134         ssize_t written;
5135         int offset;
5136         int size;
5137         int len;
5138         int ret;
5139         int i;
5140
5141         if (tracing_disabled)
5142                 return -EINVAL;
5143
5144         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5145                 return -EINVAL;
5146
5147         if (cnt > TRACE_BUF_SIZE)
5148                 cnt = TRACE_BUF_SIZE;
5149
5150         /*
5151          * Userspace is injecting traces into the kernel trace buffer.
5152          * We want to be as non intrusive as possible.
5153          * To do so, we do not want to allocate any special buffers
5154          * or take any locks, but instead write the userspace data
5155          * straight into the ring buffer.
5156          *
5157          * First we need to pin the userspace buffer into memory,
5158          * which, most likely it is, because it just referenced it.
5159          * But there's no guarantee that it is. By using get_user_pages_fast()
5160          * and kmap_atomic/kunmap_atomic() we can get access to the
5161          * pages directly. We then write the data directly into the
5162          * ring buffer.
5163          */
5164         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5165
5166         /* check if we cross pages */
5167         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5168                 nr_pages = 2;
5169
5170         offset = addr & (PAGE_SIZE - 1);
5171         addr &= PAGE_MASK;
5172
5173         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5174         if (ret < nr_pages) {
5175                 while (--ret >= 0)
5176                         put_page(pages[ret]);
5177                 written = -EFAULT;
5178                 goto out;
5179         }
5180
5181         for (i = 0; i < nr_pages; i++)
5182                 map_page[i] = kmap_atomic(pages[i]);
5183
5184         local_save_flags(irq_flags);
5185         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5186         buffer = tr->trace_buffer.buffer;
5187         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5188                                           irq_flags, preempt_count());
5189         if (!event) {
5190                 /* Ring buffer disabled, return as if not open for write */
5191                 written = -EBADF;
5192                 goto out_unlock;
5193         }
5194
5195         entry = ring_buffer_event_data(event);
5196         entry->ip = _THIS_IP_;
5197
5198         if (nr_pages == 2) {
5199                 len = PAGE_SIZE - offset;
5200                 memcpy(&entry->buf, map_page[0] + offset, len);
5201                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5202         } else
5203                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5204
5205         if (entry->buf[cnt - 1] != '\n') {
5206                 entry->buf[cnt] = '\n';
5207                 entry->buf[cnt + 1] = '\0';
5208         } else
5209                 entry->buf[cnt] = '\0';
5210
5211         __buffer_unlock_commit(buffer, event);
5212
5213         written = cnt;
5214
5215         *fpos += written;
5216
5217  out_unlock:
5218         for (i = nr_pages - 1; i >= 0; i--) {
5219                 kunmap_atomic(map_page[i]);
5220                 put_page(pages[i]);
5221         }
5222  out:
5223         return written;
5224 }
5225
5226 static int tracing_clock_show(struct seq_file *m, void *v)
5227 {
5228         struct trace_array *tr = m->private;
5229         int i;
5230
5231         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5232                 seq_printf(m,
5233                         "%s%s%s%s", i ? " " : "",
5234                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5235                         i == tr->clock_id ? "]" : "");
5236         seq_putc(m, '\n');
5237
5238         return 0;
5239 }
5240
5241 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5242 {
5243         int i;
5244
5245         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5246                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5247                         break;
5248         }
5249         if (i == ARRAY_SIZE(trace_clocks))
5250                 return -EINVAL;
5251
5252         mutex_lock(&trace_types_lock);
5253
5254         tr->clock_id = i;
5255
5256         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5257
5258         /*
5259          * New clock may not be consistent with the previous clock.
5260          * Reset the buffer so that it doesn't have incomparable timestamps.
5261          */
5262         tracing_reset_online_cpus(&tr->trace_buffer);
5263
5264 #ifdef CONFIG_TRACER_MAX_TRACE
5265         if (tr->max_buffer.buffer)
5266                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5267         tracing_reset_online_cpus(&tr->max_buffer);
5268 #endif
5269
5270         mutex_unlock(&trace_types_lock);
5271
5272         return 0;
5273 }
5274
5275 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5276                                    size_t cnt, loff_t *fpos)
5277 {
5278         struct seq_file *m = filp->private_data;
5279         struct trace_array *tr = m->private;
5280         char buf[64];
5281         const char *clockstr;
5282         int ret;
5283
5284         if (cnt >= sizeof(buf))
5285                 return -EINVAL;
5286
5287         if (copy_from_user(&buf, ubuf, cnt))
5288                 return -EFAULT;
5289
5290         buf[cnt] = 0;
5291
5292         clockstr = strstrip(buf);
5293
5294         ret = tracing_set_clock(tr, clockstr);
5295         if (ret)
5296                 return ret;
5297
5298         *fpos += cnt;
5299
5300         return cnt;
5301 }
5302
5303 static int tracing_clock_open(struct inode *inode, struct file *file)
5304 {
5305         struct trace_array *tr = inode->i_private;
5306         int ret;
5307
5308         if (tracing_disabled)
5309                 return -ENODEV;
5310
5311         if (trace_array_get(tr))
5312                 return -ENODEV;
5313
5314         ret = single_open(file, tracing_clock_show, inode->i_private);
5315         if (ret < 0)
5316                 trace_array_put(tr);
5317
5318         return ret;
5319 }
5320
5321 struct ftrace_buffer_info {
5322         struct trace_iterator   iter;
5323         void                    *spare;
5324         unsigned int            read;
5325 };
5326
5327 #ifdef CONFIG_TRACER_SNAPSHOT
5328 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5329 {
5330         struct trace_array *tr = inode->i_private;
5331         struct trace_iterator *iter;
5332         struct seq_file *m;
5333         int ret = 0;
5334
5335         if (trace_array_get(tr) < 0)
5336                 return -ENODEV;
5337
5338         if (file->f_mode & FMODE_READ) {
5339                 iter = __tracing_open(inode, file, true);
5340                 if (IS_ERR(iter))
5341                         ret = PTR_ERR(iter);
5342         } else {
5343                 /* Writes still need the seq_file to hold the private data */
5344                 ret = -ENOMEM;
5345                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5346                 if (!m)
5347                         goto out;
5348                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5349                 if (!iter) {
5350                         kfree(m);
5351                         goto out;
5352                 }
5353                 ret = 0;
5354
5355                 iter->tr = tr;
5356                 iter->trace_buffer = &tr->max_buffer;
5357                 iter->cpu_file = tracing_get_cpu(inode);
5358                 m->private = iter;
5359                 file->private_data = m;
5360         }
5361 out:
5362         if (ret < 0)
5363                 trace_array_put(tr);
5364
5365         return ret;
5366 }
5367
5368 static ssize_t
5369 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5370                        loff_t *ppos)
5371 {
5372         struct seq_file *m = filp->private_data;
5373         struct trace_iterator *iter = m->private;
5374         struct trace_array *tr = iter->tr;
5375         unsigned long val;
5376         int ret;
5377
5378         ret = tracing_update_buffers();
5379         if (ret < 0)
5380                 return ret;
5381
5382         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5383         if (ret)
5384                 return ret;
5385
5386         mutex_lock(&trace_types_lock);
5387
5388         if (tr->current_trace->use_max_tr) {
5389                 ret = -EBUSY;
5390                 goto out;
5391         }
5392
5393         switch (val) {
5394         case 0:
5395                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5396                         ret = -EINVAL;
5397                         break;
5398                 }
5399                 if (tr->allocated_snapshot)
5400                         free_snapshot(tr);
5401                 break;
5402         case 1:
5403 /* Only allow per-cpu swap if the ring buffer supports it */
5404 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5405                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5406                         ret = -EINVAL;
5407                         break;
5408                 }
5409 #endif
5410                 if (!tr->allocated_snapshot)
5411                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5412                                 &tr->trace_buffer, iter->cpu_file);
5413                 else
5414                         ret = alloc_snapshot(tr);
5415
5416                 if (ret < 0)
5417                         break;
5418
5419                 local_irq_disable();
5420                 /* Now, we're going to swap */
5421                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5422                         update_max_tr(tr, current, smp_processor_id());
5423                 else
5424                         update_max_tr_single(tr, current, iter->cpu_file);
5425                 local_irq_enable();
5426                 break;
5427         default:
5428                 if (tr->allocated_snapshot) {
5429                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5430                                 tracing_reset_online_cpus(&tr->max_buffer);
5431                         else
5432                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5433                 }
5434                 break;
5435         }
5436
5437         if (ret >= 0) {
5438                 *ppos += cnt;
5439                 ret = cnt;
5440         }
5441 out:
5442         mutex_unlock(&trace_types_lock);
5443         return ret;
5444 }
5445
5446 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5447 {
5448         struct seq_file *m = file->private_data;
5449         int ret;
5450
5451         ret = tracing_release(inode, file);
5452
5453         if (file->f_mode & FMODE_READ)
5454                 return ret;
5455
5456         /* If write only, the seq_file is just a stub */
5457         if (m)
5458                 kfree(m->private);
5459         kfree(m);
5460
5461         return 0;
5462 }
5463
5464 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5465 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5466                                     size_t count, loff_t *ppos);
5467 static int tracing_buffers_release(struct inode *inode, struct file *file);
5468 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5469                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5470
5471 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5472 {
5473         struct ftrace_buffer_info *info;
5474         int ret;
5475
5476         ret = tracing_buffers_open(inode, filp);
5477         if (ret < 0)
5478                 return ret;
5479
5480         info = filp->private_data;
5481
5482         if (info->iter.trace->use_max_tr) {
5483                 tracing_buffers_release(inode, filp);
5484                 return -EBUSY;
5485         }
5486
5487         info->iter.snapshot = true;
5488         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5489
5490         return ret;
5491 }
5492
5493 #endif /* CONFIG_TRACER_SNAPSHOT */
5494
5495
5496 static const struct file_operations tracing_thresh_fops = {
5497         .open           = tracing_open_generic,
5498         .read           = tracing_thresh_read,
5499         .write          = tracing_thresh_write,
5500         .llseek         = generic_file_llseek,
5501 };
5502
5503 #ifdef CONFIG_TRACER_MAX_TRACE
5504 static const struct file_operations tracing_max_lat_fops = {
5505         .open           = tracing_open_generic,
5506         .read           = tracing_max_lat_read,
5507         .write          = tracing_max_lat_write,
5508         .llseek         = generic_file_llseek,
5509 };
5510 #endif
5511
5512 static const struct file_operations set_tracer_fops = {
5513         .open           = tracing_open_generic,
5514         .read           = tracing_set_trace_read,
5515         .write          = tracing_set_trace_write,
5516         .llseek         = generic_file_llseek,
5517 };
5518
5519 static const struct file_operations tracing_pipe_fops = {
5520         .open           = tracing_open_pipe,
5521         .poll           = tracing_poll_pipe,
5522         .read           = tracing_read_pipe,
5523         .splice_read    = tracing_splice_read_pipe,
5524         .release        = tracing_release_pipe,
5525         .llseek         = no_llseek,
5526 };
5527
5528 static const struct file_operations tracing_entries_fops = {
5529         .open           = tracing_open_generic_tr,
5530         .read           = tracing_entries_read,
5531         .write          = tracing_entries_write,
5532         .llseek         = generic_file_llseek,
5533         .release        = tracing_release_generic_tr,
5534 };
5535
5536 static const struct file_operations tracing_total_entries_fops = {
5537         .open           = tracing_open_generic_tr,
5538         .read           = tracing_total_entries_read,
5539         .llseek         = generic_file_llseek,
5540         .release        = tracing_release_generic_tr,
5541 };
5542
5543 static const struct file_operations tracing_free_buffer_fops = {
5544         .open           = tracing_open_generic_tr,
5545         .write          = tracing_free_buffer_write,
5546         .release        = tracing_free_buffer_release,
5547 };
5548
5549 static const struct file_operations tracing_mark_fops = {
5550         .open           = tracing_open_generic_tr,
5551         .write          = tracing_mark_write,
5552         .llseek         = generic_file_llseek,
5553         .release        = tracing_release_generic_tr,
5554 };
5555
5556 static const struct file_operations trace_clock_fops = {
5557         .open           = tracing_clock_open,
5558         .read           = seq_read,
5559         .llseek         = seq_lseek,
5560         .release        = tracing_single_release_tr,
5561         .write          = tracing_clock_write,
5562 };
5563
5564 #ifdef CONFIG_TRACER_SNAPSHOT
5565 static const struct file_operations snapshot_fops = {
5566         .open           = tracing_snapshot_open,
5567         .read           = seq_read,
5568         .write          = tracing_snapshot_write,
5569         .llseek         = tracing_lseek,
5570         .release        = tracing_snapshot_release,
5571 };
5572
5573 static const struct file_operations snapshot_raw_fops = {
5574         .open           = snapshot_raw_open,
5575         .read           = tracing_buffers_read,
5576         .release        = tracing_buffers_release,
5577         .splice_read    = tracing_buffers_splice_read,
5578         .llseek         = no_llseek,
5579 };
5580
5581 #endif /* CONFIG_TRACER_SNAPSHOT */
5582
5583 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5584 {
5585         struct trace_array *tr = inode->i_private;
5586         struct ftrace_buffer_info *info;
5587         int ret;
5588
5589         if (tracing_disabled)
5590                 return -ENODEV;
5591
5592         if (trace_array_get(tr) < 0)
5593                 return -ENODEV;
5594
5595         info = kzalloc(sizeof(*info), GFP_KERNEL);
5596         if (!info) {
5597                 trace_array_put(tr);
5598                 return -ENOMEM;
5599         }
5600
5601         mutex_lock(&trace_types_lock);
5602
5603         info->iter.tr           = tr;
5604         info->iter.cpu_file     = tracing_get_cpu(inode);
5605         info->iter.trace        = tr->current_trace;
5606         info->iter.trace_buffer = &tr->trace_buffer;
5607         info->spare             = NULL;
5608         /* Force reading ring buffer for first read */
5609         info->read              = (unsigned int)-1;
5610
5611         filp->private_data = info;
5612
5613         tr->current_trace->ref++;
5614
5615         mutex_unlock(&trace_types_lock);
5616
5617         ret = nonseekable_open(inode, filp);
5618         if (ret < 0)
5619                 trace_array_put(tr);
5620
5621         return ret;
5622 }
5623
5624 static unsigned int
5625 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5626 {
5627         struct ftrace_buffer_info *info = filp->private_data;
5628         struct trace_iterator *iter = &info->iter;
5629
5630         return trace_poll(iter, filp, poll_table);
5631 }
5632
5633 static ssize_t
5634 tracing_buffers_read(struct file *filp, char __user *ubuf,
5635                      size_t count, loff_t *ppos)
5636 {
5637         struct ftrace_buffer_info *info = filp->private_data;
5638         struct trace_iterator *iter = &info->iter;
5639         ssize_t ret;
5640         ssize_t size;
5641
5642         if (!count)
5643                 return 0;
5644
5645 #ifdef CONFIG_TRACER_MAX_TRACE
5646         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5647                 return -EBUSY;
5648 #endif
5649
5650         if (!info->spare)
5651                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5652                                                           iter->cpu_file);
5653         if (!info->spare)
5654                 return -ENOMEM;
5655
5656         /* Do we have previous read data to read? */
5657         if (info->read < PAGE_SIZE)
5658                 goto read;
5659
5660  again:
5661         trace_access_lock(iter->cpu_file);
5662         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5663                                     &info->spare,
5664                                     count,
5665                                     iter->cpu_file, 0);
5666         trace_access_unlock(iter->cpu_file);
5667
5668         if (ret < 0) {
5669                 if (trace_empty(iter)) {
5670                         if ((filp->f_flags & O_NONBLOCK))
5671                                 return -EAGAIN;
5672
5673                         ret = wait_on_pipe(iter, false);
5674                         if (ret)
5675                                 return ret;
5676
5677                         goto again;
5678                 }
5679                 return 0;
5680         }
5681
5682         info->read = 0;
5683  read:
5684         size = PAGE_SIZE - info->read;
5685         if (size > count)
5686                 size = count;
5687
5688         ret = copy_to_user(ubuf, info->spare + info->read, size);
5689         if (ret == size)
5690                 return -EFAULT;
5691
5692         size -= ret;
5693
5694         *ppos += size;
5695         info->read += size;
5696
5697         return size;
5698 }
5699
5700 static int tracing_buffers_release(struct inode *inode, struct file *file)
5701 {
5702         struct ftrace_buffer_info *info = file->private_data;
5703         struct trace_iterator *iter = &info->iter;
5704
5705         mutex_lock(&trace_types_lock);
5706
5707         iter->tr->current_trace->ref--;
5708
5709         __trace_array_put(iter->tr);
5710
5711         if (info->spare)
5712                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5713         kfree(info);
5714
5715         mutex_unlock(&trace_types_lock);
5716
5717         return 0;
5718 }
5719
5720 struct buffer_ref {
5721         struct ring_buffer      *buffer;
5722         void                    *page;
5723         int                     ref;
5724 };
5725
5726 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5727                                     struct pipe_buffer *buf)
5728 {
5729         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5730
5731         if (--ref->ref)
5732                 return;
5733
5734         ring_buffer_free_read_page(ref->buffer, ref->page);
5735         kfree(ref);
5736         buf->private = 0;
5737 }
5738
5739 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5740                                 struct pipe_buffer *buf)
5741 {
5742         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5743
5744         if (ref->ref > INT_MAX/2)
5745                 return false;
5746
5747         ref->ref++;
5748         return true;
5749 }
5750
5751 /* Pipe buffer operations for a buffer. */
5752 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5753         .can_merge              = 0,
5754         .confirm                = generic_pipe_buf_confirm,
5755         .release                = buffer_pipe_buf_release,
5756         .steal                  = generic_pipe_buf_steal,
5757         .get                    = buffer_pipe_buf_get,
5758 };
5759
5760 /*
5761  * Callback from splice_to_pipe(), if we need to release some pages
5762  * at the end of the spd in case we error'ed out in filling the pipe.
5763  */
5764 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5765 {
5766         struct buffer_ref *ref =
5767                 (struct buffer_ref *)spd->partial[i].private;
5768
5769         if (--ref->ref)
5770                 return;
5771
5772         ring_buffer_free_read_page(ref->buffer, ref->page);
5773         kfree(ref);
5774         spd->partial[i].private = 0;
5775 }
5776
5777 static ssize_t
5778 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5779                             struct pipe_inode_info *pipe, size_t len,
5780                             unsigned int flags)
5781 {
5782         struct ftrace_buffer_info *info = file->private_data;
5783         struct trace_iterator *iter = &info->iter;
5784         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5785         struct page *pages_def[PIPE_DEF_BUFFERS];
5786         struct splice_pipe_desc spd = {
5787                 .pages          = pages_def,
5788                 .partial        = partial_def,
5789                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5790                 .flags          = flags,
5791                 .ops            = &buffer_pipe_buf_ops,
5792                 .spd_release    = buffer_spd_release,
5793         };
5794         struct buffer_ref *ref;
5795         int entries, i;
5796         ssize_t ret = 0;
5797
5798 #ifdef CONFIG_TRACER_MAX_TRACE
5799         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5800                 return -EBUSY;
5801 #endif
5802
5803         if (*ppos & (PAGE_SIZE - 1))
5804                 return -EINVAL;
5805
5806         if (len & (PAGE_SIZE - 1)) {
5807                 if (len < PAGE_SIZE)
5808                         return -EINVAL;
5809                 len &= PAGE_MASK;
5810         }
5811
5812         if (splice_grow_spd(pipe, &spd))
5813                 return -ENOMEM;
5814
5815  again:
5816         trace_access_lock(iter->cpu_file);
5817         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5818
5819         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5820                 struct page *page;
5821                 int r;
5822
5823                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5824                 if (!ref) {
5825                         ret = -ENOMEM;
5826                         break;
5827                 }
5828
5829                 ref->ref = 1;
5830                 ref->buffer = iter->trace_buffer->buffer;
5831                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5832                 if (!ref->page) {
5833                         ret = -ENOMEM;
5834                         kfree(ref);
5835                         break;
5836                 }
5837
5838                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5839                                           len, iter->cpu_file, 1);
5840                 if (r < 0) {
5841                         ring_buffer_free_read_page(ref->buffer, ref->page);
5842                         kfree(ref);
5843                         break;
5844                 }
5845
5846                 page = virt_to_page(ref->page);
5847
5848                 spd.pages[i] = page;
5849                 spd.partial[i].len = PAGE_SIZE;
5850                 spd.partial[i].offset = 0;
5851                 spd.partial[i].private = (unsigned long)ref;
5852                 spd.nr_pages++;
5853                 *ppos += PAGE_SIZE;
5854
5855                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5856         }
5857
5858         trace_access_unlock(iter->cpu_file);
5859         spd.nr_pages = i;
5860
5861         /* did we read anything? */
5862         if (!spd.nr_pages) {
5863                 if (ret)
5864                         goto out;
5865
5866                 ret = -EAGAIN;
5867                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5868                         goto out;
5869
5870                 ret = wait_on_pipe(iter, true);
5871                 if (ret)
5872                         goto out;
5873
5874                 goto again;
5875         }
5876
5877         ret = splice_to_pipe(pipe, &spd);
5878 out:
5879         splice_shrink_spd(&spd);
5880
5881         return ret;
5882 }
5883
5884 static const struct file_operations tracing_buffers_fops = {
5885         .open           = tracing_buffers_open,
5886         .read           = tracing_buffers_read,
5887         .poll           = tracing_buffers_poll,
5888         .release        = tracing_buffers_release,
5889         .splice_read    = tracing_buffers_splice_read,
5890         .llseek         = no_llseek,
5891 };
5892
5893 static ssize_t
5894 tracing_stats_read(struct file *filp, char __user *ubuf,
5895                    size_t count, loff_t *ppos)
5896 {
5897         struct inode *inode = file_inode(filp);
5898         struct trace_array *tr = inode->i_private;
5899         struct trace_buffer *trace_buf = &tr->trace_buffer;
5900         int cpu = tracing_get_cpu(inode);
5901         struct trace_seq *s;
5902         unsigned long cnt;
5903         unsigned long long t;
5904         unsigned long usec_rem;
5905
5906         s = kmalloc(sizeof(*s), GFP_KERNEL);
5907         if (!s)
5908                 return -ENOMEM;
5909
5910         trace_seq_init(s);
5911
5912         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5913         trace_seq_printf(s, "entries: %ld\n", cnt);
5914
5915         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5916         trace_seq_printf(s, "overrun: %ld\n", cnt);
5917
5918         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5919         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5920
5921         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5922         trace_seq_printf(s, "bytes: %ld\n", cnt);
5923
5924         if (trace_clocks[tr->clock_id].in_ns) {
5925                 /* local or global for trace_clock */
5926                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5927                 usec_rem = do_div(t, USEC_PER_SEC);
5928                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5929                                                                 t, usec_rem);
5930
5931                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5932                 usec_rem = do_div(t, USEC_PER_SEC);
5933                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5934         } else {
5935                 /* counter or tsc mode for trace_clock */
5936                 trace_seq_printf(s, "oldest event ts: %llu\n",
5937                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5938
5939                 trace_seq_printf(s, "now ts: %llu\n",
5940                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5941         }
5942
5943         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5944         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5945
5946         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5947         trace_seq_printf(s, "read events: %ld\n", cnt);
5948
5949         count = simple_read_from_buffer(ubuf, count, ppos,
5950                                         s->buffer, trace_seq_used(s));
5951
5952         kfree(s);
5953
5954         return count;
5955 }
5956
5957 static const struct file_operations tracing_stats_fops = {
5958         .open           = tracing_open_generic_tr,
5959         .read           = tracing_stats_read,
5960         .llseek         = generic_file_llseek,
5961         .release        = tracing_release_generic_tr,
5962 };
5963
5964 #ifdef CONFIG_DYNAMIC_FTRACE
5965
5966 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5967 {
5968         return 0;
5969 }
5970
5971 static ssize_t
5972 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5973                   size_t cnt, loff_t *ppos)
5974 {
5975         static char ftrace_dyn_info_buffer[1024];
5976         static DEFINE_MUTEX(dyn_info_mutex);
5977         unsigned long *p = filp->private_data;
5978         char *buf = ftrace_dyn_info_buffer;
5979         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5980         int r;
5981
5982         mutex_lock(&dyn_info_mutex);
5983         r = sprintf(buf, "%ld ", *p);
5984
5985         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5986         buf[r++] = '\n';
5987
5988         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5989
5990         mutex_unlock(&dyn_info_mutex);
5991
5992         return r;
5993 }
5994
5995 static const struct file_operations tracing_dyn_info_fops = {
5996         .open           = tracing_open_generic,
5997         .read           = tracing_read_dyn_info,
5998         .llseek         = generic_file_llseek,
5999 };
6000 #endif /* CONFIG_DYNAMIC_FTRACE */
6001
6002 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6003 static void
6004 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6005 {
6006         tracing_snapshot();
6007 }
6008
6009 static void
6010 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6011 {
6012         unsigned long *count = (long *)data;
6013
6014         if (!*count)
6015                 return;
6016
6017         if (*count != -1)
6018                 (*count)--;
6019
6020         tracing_snapshot();
6021 }
6022
6023 static int
6024 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6025                       struct ftrace_probe_ops *ops, void *data)
6026 {
6027         long count = (long)data;
6028
6029         seq_printf(m, "%ps:", (void *)ip);
6030
6031         seq_puts(m, "snapshot");
6032
6033         if (count == -1)
6034                 seq_puts(m, ":unlimited\n");
6035         else
6036                 seq_printf(m, ":count=%ld\n", count);
6037
6038         return 0;
6039 }
6040
6041 static struct ftrace_probe_ops snapshot_probe_ops = {
6042         .func                   = ftrace_snapshot,
6043         .print                  = ftrace_snapshot_print,
6044 };
6045
6046 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6047         .func                   = ftrace_count_snapshot,
6048         .print                  = ftrace_snapshot_print,
6049 };
6050
6051 static int
6052 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6053                                char *glob, char *cmd, char *param, int enable)
6054 {
6055         struct ftrace_probe_ops *ops;
6056         void *count = (void *)-1;
6057         char *number;
6058         int ret;
6059
6060         /* hash funcs only work with set_ftrace_filter */
6061         if (!enable)
6062                 return -EINVAL;
6063
6064         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6065
6066         if (glob[0] == '!') {
6067                 unregister_ftrace_function_probe_func(glob+1, ops);
6068                 return 0;
6069         }
6070
6071         if (!param)
6072                 goto out_reg;
6073
6074         number = strsep(&param, ":");
6075
6076         if (!strlen(number))
6077                 goto out_reg;
6078
6079         /*
6080          * We use the callback data field (which is a pointer)
6081          * as our counter.
6082          */
6083         ret = kstrtoul(number, 0, (unsigned long *)&count);
6084         if (ret)
6085                 return ret;
6086
6087  out_reg:
6088         ret = alloc_snapshot(&global_trace);
6089         if (ret < 0)
6090                 goto out;
6091
6092         ret = register_ftrace_function_probe(glob, ops, count);
6093
6094  out:
6095         return ret < 0 ? ret : 0;
6096 }
6097
6098 static struct ftrace_func_command ftrace_snapshot_cmd = {
6099         .name                   = "snapshot",
6100         .func                   = ftrace_trace_snapshot_callback,
6101 };
6102
6103 static __init int register_snapshot_cmd(void)
6104 {
6105         return register_ftrace_command(&ftrace_snapshot_cmd);
6106 }
6107 #else
6108 static inline __init int register_snapshot_cmd(void) { return 0; }
6109 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6110
6111 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6112 {
6113         if (WARN_ON(!tr->dir))
6114                 return ERR_PTR(-ENODEV);
6115
6116         /* Top directory uses NULL as the parent */
6117         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6118                 return NULL;
6119
6120         /* All sub buffers have a descriptor */
6121         return tr->dir;
6122 }
6123
6124 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6125 {
6126         struct dentry *d_tracer;
6127
6128         if (tr->percpu_dir)
6129                 return tr->percpu_dir;
6130
6131         d_tracer = tracing_get_dentry(tr);
6132         if (IS_ERR(d_tracer))
6133                 return NULL;
6134
6135         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6136
6137         WARN_ONCE(!tr->percpu_dir,
6138                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6139
6140         return tr->percpu_dir;
6141 }
6142
6143 static struct dentry *
6144 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6145                       void *data, long cpu, const struct file_operations *fops)
6146 {
6147         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6148
6149         if (ret) /* See tracing_get_cpu() */
6150                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6151         return ret;
6152 }
6153
6154 static void
6155 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6156 {
6157         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6158         struct dentry *d_cpu;
6159         char cpu_dir[30]; /* 30 characters should be more than enough */
6160
6161         if (!d_percpu)
6162                 return;
6163
6164         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6165         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6166         if (!d_cpu) {
6167                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6168                 return;
6169         }
6170
6171         /* per cpu trace_pipe */
6172         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6173                                 tr, cpu, &tracing_pipe_fops);
6174
6175         /* per cpu trace */
6176         trace_create_cpu_file("trace", 0644, d_cpu,
6177                                 tr, cpu, &tracing_fops);
6178
6179         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6180                                 tr, cpu, &tracing_buffers_fops);
6181
6182         trace_create_cpu_file("stats", 0444, d_cpu,
6183                                 tr, cpu, &tracing_stats_fops);
6184
6185         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6186                                 tr, cpu, &tracing_entries_fops);
6187
6188 #ifdef CONFIG_TRACER_SNAPSHOT
6189         trace_create_cpu_file("snapshot", 0644, d_cpu,
6190                                 tr, cpu, &snapshot_fops);
6191
6192         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6193                                 tr, cpu, &snapshot_raw_fops);
6194 #endif
6195 }
6196
6197 #ifdef CONFIG_FTRACE_SELFTEST
6198 /* Let selftest have access to static functions in this file */
6199 #include "trace_selftest.c"
6200 #endif
6201
6202 static ssize_t
6203 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6204                         loff_t *ppos)
6205 {
6206         struct trace_option_dentry *topt = filp->private_data;
6207         char *buf;
6208
6209         if (topt->flags->val & topt->opt->bit)
6210                 buf = "1\n";
6211         else
6212                 buf = "0\n";
6213
6214         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6215 }
6216
6217 static ssize_t
6218 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6219                          loff_t *ppos)
6220 {
6221         struct trace_option_dentry *topt = filp->private_data;
6222         unsigned long val;
6223         int ret;
6224
6225         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6226         if (ret)
6227                 return ret;
6228
6229         if (val != 0 && val != 1)
6230                 return -EINVAL;
6231
6232         if (!!(topt->flags->val & topt->opt->bit) != val) {
6233                 mutex_lock(&trace_types_lock);
6234                 ret = __set_tracer_option(topt->tr, topt->flags,
6235                                           topt->opt, !val);
6236                 mutex_unlock(&trace_types_lock);
6237                 if (ret)
6238                         return ret;
6239         }
6240
6241         *ppos += cnt;
6242
6243         return cnt;
6244 }
6245
6246
6247 static const struct file_operations trace_options_fops = {
6248         .open = tracing_open_generic,
6249         .read = trace_options_read,
6250         .write = trace_options_write,
6251         .llseek = generic_file_llseek,
6252 };
6253
6254 /*
6255  * In order to pass in both the trace_array descriptor as well as the index
6256  * to the flag that the trace option file represents, the trace_array
6257  * has a character array of trace_flags_index[], which holds the index
6258  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6259  * The address of this character array is passed to the flag option file
6260  * read/write callbacks.
6261  *
6262  * In order to extract both the index and the trace_array descriptor,
6263  * get_tr_index() uses the following algorithm.
6264  *
6265  *   idx = *ptr;
6266  *
6267  * As the pointer itself contains the address of the index (remember
6268  * index[1] == 1).
6269  *
6270  * Then to get the trace_array descriptor, by subtracting that index
6271  * from the ptr, we get to the start of the index itself.
6272  *
6273  *   ptr - idx == &index[0]
6274  *
6275  * Then a simple container_of() from that pointer gets us to the
6276  * trace_array descriptor.
6277  */
6278 static void get_tr_index(void *data, struct trace_array **ptr,
6279                          unsigned int *pindex)
6280 {
6281         *pindex = *(unsigned char *)data;
6282
6283         *ptr = container_of(data - *pindex, struct trace_array,
6284                             trace_flags_index);
6285 }
6286
6287 static ssize_t
6288 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6289                         loff_t *ppos)
6290 {
6291         void *tr_index = filp->private_data;
6292         struct trace_array *tr;
6293         unsigned int index;
6294         char *buf;
6295
6296         get_tr_index(tr_index, &tr, &index);
6297
6298         if (tr->trace_flags & (1 << index))
6299                 buf = "1\n";
6300         else
6301                 buf = "0\n";
6302
6303         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6304 }
6305
6306 static ssize_t
6307 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6308                          loff_t *ppos)
6309 {
6310         void *tr_index = filp->private_data;
6311         struct trace_array *tr;
6312         unsigned int index;
6313         unsigned long val;
6314         int ret;
6315
6316         get_tr_index(tr_index, &tr, &index);
6317
6318         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6319         if (ret)
6320                 return ret;
6321
6322         if (val != 0 && val != 1)
6323                 return -EINVAL;
6324
6325         mutex_lock(&trace_types_lock);
6326         ret = set_tracer_flag(tr, 1 << index, val);
6327         mutex_unlock(&trace_types_lock);
6328
6329         if (ret < 0)
6330                 return ret;
6331
6332         *ppos += cnt;
6333
6334         return cnt;
6335 }
6336
6337 static const struct file_operations trace_options_core_fops = {
6338         .open = tracing_open_generic,
6339         .read = trace_options_core_read,
6340         .write = trace_options_core_write,
6341         .llseek = generic_file_llseek,
6342 };
6343
6344 struct dentry *trace_create_file(const char *name,
6345                                  umode_t mode,
6346                                  struct dentry *parent,
6347                                  void *data,
6348                                  const struct file_operations *fops)
6349 {
6350         struct dentry *ret;
6351
6352         ret = tracefs_create_file(name, mode, parent, data, fops);
6353         if (!ret)
6354                 pr_warning("Could not create tracefs '%s' entry\n", name);
6355
6356         return ret;
6357 }
6358
6359
6360 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6361 {
6362         struct dentry *d_tracer;
6363
6364         if (tr->options)
6365                 return tr->options;
6366
6367         d_tracer = tracing_get_dentry(tr);
6368         if (IS_ERR(d_tracer))
6369                 return NULL;
6370
6371         tr->options = tracefs_create_dir("options", d_tracer);
6372         if (!tr->options) {
6373                 pr_warning("Could not create tracefs directory 'options'\n");
6374                 return NULL;
6375         }
6376
6377         return tr->options;
6378 }
6379
6380 static void
6381 create_trace_option_file(struct trace_array *tr,
6382                          struct trace_option_dentry *topt,
6383                          struct tracer_flags *flags,
6384                          struct tracer_opt *opt)
6385 {
6386         struct dentry *t_options;
6387
6388         t_options = trace_options_init_dentry(tr);
6389         if (!t_options)
6390                 return;
6391
6392         topt->flags = flags;
6393         topt->opt = opt;
6394         topt->tr = tr;
6395
6396         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6397                                     &trace_options_fops);
6398
6399 }
6400
6401 static void
6402 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6403 {
6404         struct trace_option_dentry *topts;
6405         struct trace_options *tr_topts;
6406         struct tracer_flags *flags;
6407         struct tracer_opt *opts;
6408         int cnt;
6409         int i;
6410
6411         if (!tracer)
6412                 return;
6413
6414         flags = tracer->flags;
6415
6416         if (!flags || !flags->opts)
6417                 return;
6418
6419         /*
6420          * If this is an instance, only create flags for tracers
6421          * the instance may have.
6422          */
6423         if (!trace_ok_for_array(tracer, tr))
6424                 return;
6425
6426         for (i = 0; i < tr->nr_topts; i++) {
6427                 /*
6428                  * Check if these flags have already been added.
6429                  * Some tracers share flags.
6430                  */
6431                 if (tr->topts[i].tracer->flags == tracer->flags)
6432                         return;
6433         }
6434
6435         opts = flags->opts;
6436
6437         for (cnt = 0; opts[cnt].name; cnt++)
6438                 ;
6439
6440         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6441         if (!topts)
6442                 return;
6443
6444         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6445                             GFP_KERNEL);
6446         if (!tr_topts) {
6447                 kfree(topts);
6448                 return;
6449         }
6450
6451         tr->topts = tr_topts;
6452         tr->topts[tr->nr_topts].tracer = tracer;
6453         tr->topts[tr->nr_topts].topts = topts;
6454         tr->nr_topts++;
6455
6456         for (cnt = 0; opts[cnt].name; cnt++) {
6457                 create_trace_option_file(tr, &topts[cnt], flags,
6458                                          &opts[cnt]);
6459                 WARN_ONCE(topts[cnt].entry == NULL,
6460                           "Failed to create trace option: %s",
6461                           opts[cnt].name);
6462         }
6463 }
6464
6465 static struct dentry *
6466 create_trace_option_core_file(struct trace_array *tr,
6467                               const char *option, long index)
6468 {
6469         struct dentry *t_options;
6470
6471         t_options = trace_options_init_dentry(tr);
6472         if (!t_options)
6473                 return NULL;
6474
6475         return trace_create_file(option, 0644, t_options,
6476                                  (void *)&tr->trace_flags_index[index],
6477                                  &trace_options_core_fops);
6478 }
6479
6480 static void create_trace_options_dir(struct trace_array *tr)
6481 {
6482         struct dentry *t_options;
6483         bool top_level = tr == &global_trace;
6484         int i;
6485
6486         t_options = trace_options_init_dentry(tr);
6487         if (!t_options)
6488                 return;
6489
6490         for (i = 0; trace_options[i]; i++) {
6491                 if (top_level ||
6492                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6493                         create_trace_option_core_file(tr, trace_options[i], i);
6494         }
6495 }
6496
6497 static ssize_t
6498 rb_simple_read(struct file *filp, char __user *ubuf,
6499                size_t cnt, loff_t *ppos)
6500 {
6501         struct trace_array *tr = filp->private_data;
6502         char buf[64];
6503         int r;
6504
6505         r = tracer_tracing_is_on(tr);
6506         r = sprintf(buf, "%d\n", r);
6507
6508         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6509 }
6510
6511 static ssize_t
6512 rb_simple_write(struct file *filp, const char __user *ubuf,
6513                 size_t cnt, loff_t *ppos)
6514 {
6515         struct trace_array *tr = filp->private_data;
6516         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6517         unsigned long val;
6518         int ret;
6519
6520         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6521         if (ret)
6522                 return ret;
6523
6524         if (buffer) {
6525                 mutex_lock(&trace_types_lock);
6526                 if (!!val == tracer_tracing_is_on(tr)) {
6527                         val = 0; /* do nothing */
6528                 } else if (val) {
6529                         tracer_tracing_on(tr);
6530                         if (tr->current_trace->start)
6531                                 tr->current_trace->start(tr);
6532                 } else {
6533                         tracer_tracing_off(tr);
6534                         if (tr->current_trace->stop)
6535                                 tr->current_trace->stop(tr);
6536                 }
6537                 mutex_unlock(&trace_types_lock);
6538         }
6539
6540         (*ppos)++;
6541
6542         return cnt;
6543 }
6544
6545 static const struct file_operations rb_simple_fops = {
6546         .open           = tracing_open_generic_tr,
6547         .read           = rb_simple_read,
6548         .write          = rb_simple_write,
6549         .release        = tracing_release_generic_tr,
6550         .llseek         = default_llseek,
6551 };
6552
6553 struct dentry *trace_instance_dir;
6554
6555 static void
6556 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6557
6558 static int
6559 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6560 {
6561         enum ring_buffer_flags rb_flags;
6562
6563         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6564
6565         buf->tr = tr;
6566
6567         buf->buffer = ring_buffer_alloc(size, rb_flags);
6568         if (!buf->buffer)
6569                 return -ENOMEM;
6570
6571         buf->data = alloc_percpu(struct trace_array_cpu);
6572         if (!buf->data) {
6573                 ring_buffer_free(buf->buffer);
6574                 buf->buffer = NULL;
6575                 return -ENOMEM;
6576         }
6577
6578         /* Allocate the first page for all buffers */
6579         set_buffer_entries(&tr->trace_buffer,
6580                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6581
6582         return 0;
6583 }
6584
6585 static int allocate_trace_buffers(struct trace_array *tr, int size)
6586 {
6587         int ret;
6588
6589         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6590         if (ret)
6591                 return ret;
6592
6593 #ifdef CONFIG_TRACER_MAX_TRACE
6594         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6595                                     allocate_snapshot ? size : 1);
6596         if (WARN_ON(ret)) {
6597                 ring_buffer_free(tr->trace_buffer.buffer);
6598                 tr->trace_buffer.buffer = NULL;
6599                 free_percpu(tr->trace_buffer.data);
6600                 tr->trace_buffer.data = NULL;
6601                 return -ENOMEM;
6602         }
6603         tr->allocated_snapshot = allocate_snapshot;
6604
6605         /*
6606          * Only the top level trace array gets its snapshot allocated
6607          * from the kernel command line.
6608          */
6609         allocate_snapshot = false;
6610 #endif
6611
6612         /*
6613          * Because of some magic with the way alloc_percpu() works on
6614          * x86_64, we need to synchronize the pgd of all the tables,
6615          * otherwise the trace events that happen in x86_64 page fault
6616          * handlers can't cope with accessing the chance that a
6617          * alloc_percpu()'d memory might be touched in the page fault trace
6618          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
6619          * calls in tracing, because something might get triggered within a
6620          * page fault trace event!
6621          */
6622         vmalloc_sync_mappings();
6623
6624         return 0;
6625 }
6626
6627 static void free_trace_buffer(struct trace_buffer *buf)
6628 {
6629         if (buf->buffer) {
6630                 ring_buffer_free(buf->buffer);
6631                 buf->buffer = NULL;
6632                 free_percpu(buf->data);
6633                 buf->data = NULL;
6634         }
6635 }
6636
6637 static void free_trace_buffers(struct trace_array *tr)
6638 {
6639         if (!tr)
6640                 return;
6641
6642         free_trace_buffer(&tr->trace_buffer);
6643
6644 #ifdef CONFIG_TRACER_MAX_TRACE
6645         free_trace_buffer(&tr->max_buffer);
6646 #endif
6647 }
6648
6649 static void init_trace_flags_index(struct trace_array *tr)
6650 {
6651         int i;
6652
6653         /* Used by the trace options files */
6654         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6655                 tr->trace_flags_index[i] = i;
6656 }
6657
6658 static void __update_tracer_options(struct trace_array *tr)
6659 {
6660         struct tracer *t;
6661
6662         for (t = trace_types; t; t = t->next)
6663                 add_tracer_options(tr, t);
6664 }
6665
6666 static void update_tracer_options(struct trace_array *tr)
6667 {
6668         mutex_lock(&trace_types_lock);
6669         __update_tracer_options(tr);
6670         mutex_unlock(&trace_types_lock);
6671 }
6672
6673 static int instance_mkdir(const char *name)
6674 {
6675         struct trace_array *tr;
6676         int ret;
6677
6678         mutex_lock(&trace_types_lock);
6679
6680         ret = -EEXIST;
6681         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6682                 if (tr->name && strcmp(tr->name, name) == 0)
6683                         goto out_unlock;
6684         }
6685
6686         ret = -ENOMEM;
6687         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6688         if (!tr)
6689                 goto out_unlock;
6690
6691         tr->name = kstrdup(name, GFP_KERNEL);
6692         if (!tr->name)
6693                 goto out_free_tr;
6694
6695         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6696                 goto out_free_tr;
6697
6698         tr->trace_flags = global_trace.trace_flags;
6699
6700         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6701
6702         raw_spin_lock_init(&tr->start_lock);
6703
6704         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6705
6706         tr->current_trace = &nop_trace;
6707
6708         INIT_LIST_HEAD(&tr->systems);
6709         INIT_LIST_HEAD(&tr->events);
6710
6711         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6712                 goto out_free_tr;
6713
6714         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6715         if (!tr->dir)
6716                 goto out_free_tr;
6717
6718         ret = event_trace_add_tracer(tr->dir, tr);
6719         if (ret) {
6720                 tracefs_remove_recursive(tr->dir);
6721                 goto out_free_tr;
6722         }
6723
6724         init_tracer_tracefs(tr, tr->dir);
6725         init_trace_flags_index(tr);
6726         __update_tracer_options(tr);
6727
6728         list_add(&tr->list, &ftrace_trace_arrays);
6729
6730         mutex_unlock(&trace_types_lock);
6731
6732         return 0;
6733
6734  out_free_tr:
6735         free_trace_buffers(tr);
6736         free_cpumask_var(tr->tracing_cpumask);
6737         kfree(tr->name);
6738         kfree(tr);
6739
6740  out_unlock:
6741         mutex_unlock(&trace_types_lock);
6742
6743         return ret;
6744
6745 }
6746
6747 static int instance_rmdir(const char *name)
6748 {
6749         struct trace_array *tr;
6750         int found = 0;
6751         int ret;
6752         int i;
6753
6754         mutex_lock(&trace_types_lock);
6755
6756         ret = -ENODEV;
6757         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6758                 if (tr->name && strcmp(tr->name, name) == 0) {
6759                         found = 1;
6760                         break;
6761                 }
6762         }
6763         if (!found)
6764                 goto out_unlock;
6765
6766         ret = -EBUSY;
6767         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6768                 goto out_unlock;
6769
6770         list_del(&tr->list);
6771
6772         tracing_set_nop(tr);
6773         event_trace_del_tracer(tr);
6774         ftrace_destroy_function_files(tr);
6775         tracefs_remove_recursive(tr->dir);
6776         free_trace_buffers(tr);
6777
6778         for (i = 0; i < tr->nr_topts; i++) {
6779                 kfree(tr->topts[i].topts);
6780         }
6781         kfree(tr->topts);
6782
6783         free_cpumask_var(tr->tracing_cpumask);
6784         kfree(tr->name);
6785         kfree(tr);
6786
6787         ret = 0;
6788
6789  out_unlock:
6790         mutex_unlock(&trace_types_lock);
6791
6792         return ret;
6793 }
6794
6795 static __init void create_trace_instances(struct dentry *d_tracer)
6796 {
6797         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6798                                                          instance_mkdir,
6799                                                          instance_rmdir);
6800         if (WARN_ON(!trace_instance_dir))
6801                 return;
6802 }
6803
6804 static void
6805 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6806 {
6807         int cpu;
6808
6809         trace_create_file("available_tracers", 0444, d_tracer,
6810                         tr, &show_traces_fops);
6811
6812         trace_create_file("current_tracer", 0644, d_tracer,
6813                         tr, &set_tracer_fops);
6814
6815         trace_create_file("tracing_cpumask", 0644, d_tracer,
6816                           tr, &tracing_cpumask_fops);
6817
6818         trace_create_file("trace_options", 0644, d_tracer,
6819                           tr, &tracing_iter_fops);
6820
6821         trace_create_file("trace", 0644, d_tracer,
6822                           tr, &tracing_fops);
6823
6824         trace_create_file("trace_pipe", 0444, d_tracer,
6825                           tr, &tracing_pipe_fops);
6826
6827         trace_create_file("buffer_size_kb", 0644, d_tracer,
6828                           tr, &tracing_entries_fops);
6829
6830         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6831                           tr, &tracing_total_entries_fops);
6832
6833         trace_create_file("free_buffer", 0200, d_tracer,
6834                           tr, &tracing_free_buffer_fops);
6835
6836         trace_create_file("trace_marker", 0220, d_tracer,
6837                           tr, &tracing_mark_fops);
6838
6839         trace_create_file("trace_clock", 0644, d_tracer, tr,
6840                           &trace_clock_fops);
6841
6842         trace_create_file("tracing_on", 0644, d_tracer,
6843                           tr, &rb_simple_fops);
6844
6845         create_trace_options_dir(tr);
6846
6847 #ifdef CONFIG_TRACER_MAX_TRACE
6848         trace_create_file("tracing_max_latency", 0644, d_tracer,
6849                         &tr->max_latency, &tracing_max_lat_fops);
6850 #endif
6851
6852         if (ftrace_create_function_files(tr, d_tracer))
6853                 WARN(1, "Could not allocate function filter files");
6854
6855 #ifdef CONFIG_TRACER_SNAPSHOT
6856         trace_create_file("snapshot", 0644, d_tracer,
6857                           tr, &snapshot_fops);
6858 #endif
6859
6860         for_each_tracing_cpu(cpu)
6861                 tracing_init_tracefs_percpu(tr, cpu);
6862
6863 }
6864
6865 static struct vfsmount *trace_automount(void *ingore)
6866 {
6867         struct vfsmount *mnt;
6868         struct file_system_type *type;
6869
6870         /*
6871          * To maintain backward compatibility for tools that mount
6872          * debugfs to get to the tracing facility, tracefs is automatically
6873          * mounted to the debugfs/tracing directory.
6874          */
6875         type = get_fs_type("tracefs");
6876         if (!type)
6877                 return NULL;
6878         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6879         put_filesystem(type);
6880         if (IS_ERR(mnt))
6881                 return NULL;
6882         mntget(mnt);
6883
6884         return mnt;
6885 }
6886
6887 /**
6888  * tracing_init_dentry - initialize top level trace array
6889  *
6890  * This is called when creating files or directories in the tracing
6891  * directory. It is called via fs_initcall() by any of the boot up code
6892  * and expects to return the dentry of the top level tracing directory.
6893  */
6894 struct dentry *tracing_init_dentry(void)
6895 {
6896         struct trace_array *tr = &global_trace;
6897
6898         /* The top level trace array uses  NULL as parent */
6899         if (tr->dir)
6900                 return NULL;
6901
6902         if (WARN_ON(!tracefs_initialized()) ||
6903                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6904                  WARN_ON(!debugfs_initialized())))
6905                 return ERR_PTR(-ENODEV);
6906
6907         /*
6908          * As there may still be users that expect the tracing
6909          * files to exist in debugfs/tracing, we must automount
6910          * the tracefs file system there, so older tools still
6911          * work with the newer kerenl.
6912          */
6913         tr->dir = debugfs_create_automount("tracing", NULL,
6914                                            trace_automount, NULL);
6915         if (!tr->dir) {
6916                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6917                 return ERR_PTR(-ENOMEM);
6918         }
6919
6920         return NULL;
6921 }
6922
6923 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6924 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6925
6926 static void __init trace_enum_init(void)
6927 {
6928         int len;
6929
6930         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6931         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6932 }
6933
6934 #ifdef CONFIG_MODULES
6935 static void trace_module_add_enums(struct module *mod)
6936 {
6937         if (!mod->num_trace_enums)
6938                 return;
6939
6940         /*
6941          * Modules with bad taint do not have events created, do
6942          * not bother with enums either.
6943          */
6944         if (trace_module_has_bad_taint(mod))
6945                 return;
6946
6947         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6948 }
6949
6950 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6951 static void trace_module_remove_enums(struct module *mod)
6952 {
6953         union trace_enum_map_item *map;
6954         union trace_enum_map_item **last = &trace_enum_maps;
6955
6956         if (!mod->num_trace_enums)
6957                 return;
6958
6959         mutex_lock(&trace_enum_mutex);
6960
6961         map = trace_enum_maps;
6962
6963         while (map) {
6964                 if (map->head.mod == mod)
6965                         break;
6966                 map = trace_enum_jmp_to_tail(map);
6967                 last = &map->tail.next;
6968                 map = map->tail.next;
6969         }
6970         if (!map)
6971                 goto out;
6972
6973         *last = trace_enum_jmp_to_tail(map)->tail.next;
6974         kfree(map);
6975  out:
6976         mutex_unlock(&trace_enum_mutex);
6977 }
6978 #else
6979 static inline void trace_module_remove_enums(struct module *mod) { }
6980 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6981
6982 static int trace_module_notify(struct notifier_block *self,
6983                                unsigned long val, void *data)
6984 {
6985         struct module *mod = data;
6986
6987         switch (val) {
6988         case MODULE_STATE_COMING:
6989                 trace_module_add_enums(mod);
6990                 break;
6991         case MODULE_STATE_GOING:
6992                 trace_module_remove_enums(mod);
6993                 break;
6994         }
6995
6996         return 0;
6997 }
6998
6999 static struct notifier_block trace_module_nb = {
7000         .notifier_call = trace_module_notify,
7001         .priority = 0,
7002 };
7003 #endif /* CONFIG_MODULES */
7004
7005 static __init int tracer_init_tracefs(void)
7006 {
7007         struct dentry *d_tracer;
7008
7009         trace_access_lock_init();
7010
7011         d_tracer = tracing_init_dentry();
7012         if (IS_ERR(d_tracer))
7013                 return 0;
7014
7015         init_tracer_tracefs(&global_trace, d_tracer);
7016
7017         trace_create_file("tracing_thresh", 0644, d_tracer,
7018                         &global_trace, &tracing_thresh_fops);
7019
7020         trace_create_file("README", 0444, d_tracer,
7021                         NULL, &tracing_readme_fops);
7022
7023         trace_create_file("saved_cmdlines", 0444, d_tracer,
7024                         NULL, &tracing_saved_cmdlines_fops);
7025
7026         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7027                           NULL, &tracing_saved_cmdlines_size_fops);
7028
7029         trace_enum_init();
7030
7031         trace_create_enum_file(d_tracer);
7032
7033 #ifdef CONFIG_MODULES
7034         register_module_notifier(&trace_module_nb);
7035 #endif
7036
7037 #ifdef CONFIG_DYNAMIC_FTRACE
7038         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7039                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7040 #endif
7041
7042         create_trace_instances(d_tracer);
7043
7044         update_tracer_options(&global_trace);
7045
7046         return 0;
7047 }
7048
7049 static int trace_panic_handler(struct notifier_block *this,
7050                                unsigned long event, void *unused)
7051 {
7052         if (ftrace_dump_on_oops)
7053                 ftrace_dump(ftrace_dump_on_oops);
7054         return NOTIFY_OK;
7055 }
7056
7057 static struct notifier_block trace_panic_notifier = {
7058         .notifier_call  = trace_panic_handler,
7059         .next           = NULL,
7060         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7061 };
7062
7063 static int trace_die_handler(struct notifier_block *self,
7064                              unsigned long val,
7065                              void *data)
7066 {
7067         switch (val) {
7068         case DIE_OOPS:
7069                 if (ftrace_dump_on_oops)
7070                         ftrace_dump(ftrace_dump_on_oops);
7071                 break;
7072         default:
7073                 break;
7074         }
7075         return NOTIFY_OK;
7076 }
7077
7078 static struct notifier_block trace_die_notifier = {
7079         .notifier_call = trace_die_handler,
7080         .priority = 200
7081 };
7082
7083 /*
7084  * printk is set to max of 1024, we really don't need it that big.
7085  * Nothing should be printing 1000 characters anyway.
7086  */
7087 #define TRACE_MAX_PRINT         1000
7088
7089 /*
7090  * Define here KERN_TRACE so that we have one place to modify
7091  * it if we decide to change what log level the ftrace dump
7092  * should be at.
7093  */
7094 #define KERN_TRACE              KERN_EMERG
7095
7096 void
7097 trace_printk_seq(struct trace_seq *s)
7098 {
7099         /* Probably should print a warning here. */
7100         if (s->seq.len >= TRACE_MAX_PRINT)
7101                 s->seq.len = TRACE_MAX_PRINT;
7102
7103         /*
7104          * More paranoid code. Although the buffer size is set to
7105          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7106          * an extra layer of protection.
7107          */
7108         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7109                 s->seq.len = s->seq.size - 1;
7110
7111         /* should be zero ended, but we are paranoid. */
7112         s->buffer[s->seq.len] = 0;
7113
7114         printk(KERN_TRACE "%s", s->buffer);
7115
7116         trace_seq_init(s);
7117 }
7118
7119 void trace_init_global_iter(struct trace_iterator *iter)
7120 {
7121         iter->tr = &global_trace;
7122         iter->trace = iter->tr->current_trace;
7123         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7124         iter->trace_buffer = &global_trace.trace_buffer;
7125
7126         if (iter->trace && iter->trace->open)
7127                 iter->trace->open(iter);
7128
7129         /* Annotate start of buffers if we had overruns */
7130         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7131                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7132
7133         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7134         if (trace_clocks[iter->tr->clock_id].in_ns)
7135                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7136 }
7137
7138 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7139 {
7140         /* use static because iter can be a bit big for the stack */
7141         static struct trace_iterator iter;
7142         static atomic_t dump_running;
7143         struct trace_array *tr = &global_trace;
7144         unsigned int old_userobj;
7145         unsigned long flags;
7146         int cnt = 0, cpu;
7147
7148         /* Only allow one dump user at a time. */
7149         if (atomic_inc_return(&dump_running) != 1) {
7150                 atomic_dec(&dump_running);
7151                 return;
7152         }
7153
7154         /*
7155          * Always turn off tracing when we dump.
7156          * We don't need to show trace output of what happens
7157          * between multiple crashes.
7158          *
7159          * If the user does a sysrq-z, then they can re-enable
7160          * tracing with echo 1 > tracing_on.
7161          */
7162         tracing_off();
7163
7164         local_irq_save(flags);
7165
7166         /* Simulate the iterator */
7167         trace_init_global_iter(&iter);
7168
7169         for_each_tracing_cpu(cpu) {
7170                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7171         }
7172
7173         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7174
7175         /* don't look at user memory in panic mode */
7176         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7177
7178         switch (oops_dump_mode) {
7179         case DUMP_ALL:
7180                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7181                 break;
7182         case DUMP_ORIG:
7183                 iter.cpu_file = raw_smp_processor_id();
7184                 break;
7185         case DUMP_NONE:
7186                 goto out_enable;
7187         default:
7188                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7189                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7190         }
7191
7192         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7193
7194         /* Did function tracer already get disabled? */
7195         if (ftrace_is_dead()) {
7196                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7197                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7198         }
7199
7200         /*
7201          * We need to stop all tracing on all CPUS to read the
7202          * the next buffer. This is a bit expensive, but is
7203          * not done often. We fill all what we can read,
7204          * and then release the locks again.
7205          */
7206
7207         while (!trace_empty(&iter)) {
7208
7209                 if (!cnt)
7210                         printk(KERN_TRACE "---------------------------------\n");
7211
7212                 cnt++;
7213
7214                 trace_iterator_reset(&iter);
7215                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7216
7217                 if (trace_find_next_entry_inc(&iter) != NULL) {
7218                         int ret;
7219
7220                         ret = print_trace_line(&iter);
7221                         if (ret != TRACE_TYPE_NO_CONSUME)
7222                                 trace_consume(&iter);
7223                 }
7224                 touch_nmi_watchdog();
7225
7226                 trace_printk_seq(&iter.seq);
7227         }
7228
7229         if (!cnt)
7230                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7231         else
7232                 printk(KERN_TRACE "---------------------------------\n");
7233
7234  out_enable:
7235         tr->trace_flags |= old_userobj;
7236
7237         for_each_tracing_cpu(cpu) {
7238                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7239         }
7240         atomic_dec(&dump_running);
7241         local_irq_restore(flags);
7242 }
7243 EXPORT_SYMBOL_GPL(ftrace_dump);
7244
7245 __init static int tracer_alloc_buffers(void)
7246 {
7247         int ring_buf_size;
7248         int ret = -ENOMEM;
7249
7250         /*
7251          * Make sure we don't accidently add more trace options
7252          * than we have bits for.
7253          */
7254         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7255
7256         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7257                 goto out;
7258
7259         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7260                 goto out_free_buffer_mask;
7261
7262         /* Only allocate trace_printk buffers if a trace_printk exists */
7263         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7264                 /* Must be called before global_trace.buffer is allocated */
7265                 trace_printk_init_buffers();
7266
7267         /* To save memory, keep the ring buffer size to its minimum */
7268         if (ring_buffer_expanded)
7269                 ring_buf_size = trace_buf_size;
7270         else
7271                 ring_buf_size = 1;
7272
7273         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7274         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7275
7276         raw_spin_lock_init(&global_trace.start_lock);
7277
7278         /* Used for event triggers */
7279         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7280         if (!temp_buffer)
7281                 goto out_free_cpumask;
7282
7283         if (trace_create_savedcmd() < 0)
7284                 goto out_free_temp_buffer;
7285
7286         /* TODO: make the number of buffers hot pluggable with CPUS */
7287         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7288                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7289                 WARN_ON(1);
7290                 goto out_free_savedcmd;
7291         }
7292
7293         if (global_trace.buffer_disabled)
7294                 tracing_off();
7295
7296         if (trace_boot_clock) {
7297                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7298                 if (ret < 0)
7299                         pr_warning("Trace clock %s not defined, going back to default\n",
7300                                    trace_boot_clock);
7301         }
7302
7303         /*
7304          * register_tracer() might reference current_trace, so it
7305          * needs to be set before we register anything. This is
7306          * just a bootstrap of current_trace anyway.
7307          */
7308         global_trace.current_trace = &nop_trace;
7309
7310         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7311
7312         ftrace_init_global_array_ops(&global_trace);
7313
7314         init_trace_flags_index(&global_trace);
7315
7316         register_tracer(&nop_trace);
7317
7318         /* All seems OK, enable tracing */
7319         tracing_disabled = 0;
7320
7321         atomic_notifier_chain_register(&panic_notifier_list,
7322                                        &trace_panic_notifier);
7323
7324         register_die_notifier(&trace_die_notifier);
7325
7326         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7327
7328         INIT_LIST_HEAD(&global_trace.systems);
7329         INIT_LIST_HEAD(&global_trace.events);
7330         list_add(&global_trace.list, &ftrace_trace_arrays);
7331
7332         apply_trace_boot_options();
7333
7334         register_snapshot_cmd();
7335
7336         return 0;
7337
7338 out_free_savedcmd:
7339         free_saved_cmdlines_buffer(savedcmd);
7340 out_free_temp_buffer:
7341         ring_buffer_free(temp_buffer);
7342 out_free_cpumask:
7343         free_cpumask_var(global_trace.tracing_cpumask);
7344 out_free_buffer_mask:
7345         free_cpumask_var(tracing_buffer_mask);
7346 out:
7347         return ret;
7348 }
7349
7350 void __init trace_init(void)
7351 {
7352         if (tracepoint_printk) {
7353                 tracepoint_print_iter =
7354                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7355                 if (WARN_ON(!tracepoint_print_iter))
7356                         tracepoint_printk = 0;
7357         }
7358         tracer_alloc_buffers();
7359         trace_event_init();
7360 }
7361
7362 __init static int clear_boot_tracer(void)
7363 {
7364         /*
7365          * The default tracer at boot buffer is an init section.
7366          * This function is called in lateinit. If we did not
7367          * find the boot tracer, then clear it out, to prevent
7368          * later registration from accessing the buffer that is
7369          * about to be freed.
7370          */
7371         if (!default_bootup_tracer)
7372                 return 0;
7373
7374         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7375                default_bootup_tracer);
7376         default_bootup_tracer = NULL;
7377
7378         return 0;
7379 }
7380
7381 fs_initcall(tracer_init_tracefs);
7382 late_initcall(clear_boot_tracer);