GNU Linux-libre 4.19.242-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         /* Ignore the "tp_printk_stop_on_boot" param */
236         if (*str == '_')
237                 return 0;
238
239         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
240                 tracepoint_printk = 1;
241         return 1;
242 }
243 __setup("tp_printk", set_tracepoint_printk);
244
245 unsigned long long ns2usecs(u64 nsec)
246 {
247         nsec += 500;
248         do_div(nsec, 1000);
249         return nsec;
250 }
251
252 /* trace_flags holds trace_options default values */
253 #define TRACE_DEFAULT_FLAGS                                             \
254         (FUNCTION_DEFAULT_FLAGS |                                       \
255          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
256          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
257          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
258          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
259
260 /* trace_options that are only supported by global_trace */
261 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
262                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
263
264 /* trace_flags that are default zero for instances */
265 #define ZEROED_TRACE_FLAGS \
266         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
267
268 /*
269  * The global_trace is the descriptor that holds the top-level tracing
270  * buffers for the live tracing.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list) {
504                 trace_parser_put(&parser);
505                 return -ENOMEM;
506         }
507
508         pid_list->pid_max = READ_ONCE(pid_max);
509
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 trace_parser_put(&parser);
517                 kfree(pid_list);
518                 return -ENOMEM;
519         }
520
521         if (filtered_pids) {
522                 /* copy the current bits to the new max */
523                 for_each_set_bit(pid, filtered_pids->pids,
524                                  filtered_pids->pid_max) {
525                         set_bit(pid, pid_list->pids);
526                         nr_pids++;
527                 }
528         }
529
530         while (cnt > 0) {
531
532                 pos = 0;
533
534                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
535                 if (ret < 0 || !trace_parser_loaded(&parser))
536                         break;
537
538                 read += ret;
539                 ubuf += ret;
540                 cnt -= ret;
541
542                 ret = -EINVAL;
543                 if (kstrtoul(parser.buffer, 0, &val))
544                         break;
545                 if (val >= pid_list->pid_max)
546                         break;
547
548                 pid = (pid_t)val;
549
550                 set_bit(pid, pid_list->pids);
551                 nr_pids++;
552
553                 trace_parser_clear(&parser);
554                 ret = 0;
555         }
556         trace_parser_put(&parser);
557
558         if (ret < 0) {
559                 trace_free_pid_list(pid_list);
560                 return ret;
561         }
562
563         if (!nr_pids) {
564                 /* Cleared the list of pids */
565                 trace_free_pid_list(pid_list);
566                 read = ret;
567                 pid_list = NULL;
568         }
569
570         *new_pid_list = pid_list;
571
572         return read;
573 }
574
575 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
576 {
577         u64 ts;
578
579         /* Early boot up does not have a buffer yet */
580         if (!buf->buffer)
581                 return trace_clock_local();
582
583         ts = ring_buffer_time_stamp(buf->buffer, cpu);
584         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
585
586         return ts;
587 }
588
589 u64 ftrace_now(int cpu)
590 {
591         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
592 }
593
594 /**
595  * tracing_is_enabled - Show if global_trace has been disabled
596  *
597  * Shows if the global trace has been enabled or not. It uses the
598  * mirror flag "buffer_disabled" to be used in fast paths such as for
599  * the irqsoff tracer. But it may be inaccurate due to races. If you
600  * need to know the accurate state, use tracing_is_on() which is a little
601  * slower, but accurate.
602  */
603 int tracing_is_enabled(void)
604 {
605         /*
606          * For quick access (irqsoff uses this in fast path), just
607          * return the mirror variable of the state of the ring buffer.
608          * It's a little racy, but we don't really care.
609          */
610         smp_rmb();
611         return !global_trace.buffer_disabled;
612 }
613
614 /*
615  * trace_buf_size is the size in bytes that is allocated
616  * for a buffer. Note, the number of bytes is always rounded
617  * to page size.
618  *
619  * This number is purposely set to a low number of 16384.
620  * If the dump on oops happens, it will be much appreciated
621  * to not have to wait for all that output. Anyway this can be
622  * boot time and run time configurable.
623  */
624 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
625
626 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
627
628 /* trace_types holds a link list of available tracers. */
629 static struct tracer            *trace_types __read_mostly;
630
631 /*
632  * trace_types_lock is used to protect the trace_types list.
633  */
634 DEFINE_MUTEX(trace_types_lock);
635
636 /*
637  * serialize the access of the ring buffer
638  *
639  * ring buffer serializes readers, but it is low level protection.
640  * The validity of the events (which returns by ring_buffer_peek() ..etc)
641  * are not protected by ring buffer.
642  *
643  * The content of events may become garbage if we allow other process consumes
644  * these events concurrently:
645  *   A) the page of the consumed events may become a normal page
646  *      (not reader page) in ring buffer, and this page will be rewrited
647  *      by events producer.
648  *   B) The page of the consumed events may become a page for splice_read,
649  *      and this page will be returned to system.
650  *
651  * These primitives allow multi process access to different cpu ring buffer
652  * concurrently.
653  *
654  * These primitives don't distinguish read-only and read-consume access.
655  * Multi read-only access are also serialized.
656  */
657
658 #ifdef CONFIG_SMP
659 static DECLARE_RWSEM(all_cpu_access_lock);
660 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
661
662 static inline void trace_access_lock(int cpu)
663 {
664         if (cpu == RING_BUFFER_ALL_CPUS) {
665                 /* gain it for accessing the whole ring buffer. */
666                 down_write(&all_cpu_access_lock);
667         } else {
668                 /* gain it for accessing a cpu ring buffer. */
669
670                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
671                 down_read(&all_cpu_access_lock);
672
673                 /* Secondly block other access to this @cpu ring buffer. */
674                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
675         }
676 }
677
678 static inline void trace_access_unlock(int cpu)
679 {
680         if (cpu == RING_BUFFER_ALL_CPUS) {
681                 up_write(&all_cpu_access_lock);
682         } else {
683                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
684                 up_read(&all_cpu_access_lock);
685         }
686 }
687
688 static inline void trace_access_lock_init(void)
689 {
690         int cpu;
691
692         for_each_possible_cpu(cpu)
693                 mutex_init(&per_cpu(cpu_access_lock, cpu));
694 }
695
696 #else
697
698 static DEFINE_MUTEX(access_lock);
699
700 static inline void trace_access_lock(int cpu)
701 {
702         (void)cpu;
703         mutex_lock(&access_lock);
704 }
705
706 static inline void trace_access_unlock(int cpu)
707 {
708         (void)cpu;
709         mutex_unlock(&access_lock);
710 }
711
712 static inline void trace_access_lock_init(void)
713 {
714 }
715
716 #endif
717
718 #ifdef CONFIG_STACKTRACE
719 static void __ftrace_trace_stack(struct ring_buffer *buffer,
720                                  unsigned long flags,
721                                  int skip, int pc, struct pt_regs *regs);
722 static inline void ftrace_trace_stack(struct trace_array *tr,
723                                       struct ring_buffer *buffer,
724                                       unsigned long flags,
725                                       int skip, int pc, struct pt_regs *regs);
726
727 #else
728 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
729                                         unsigned long flags,
730                                         int skip, int pc, struct pt_regs *regs)
731 {
732 }
733 static inline void ftrace_trace_stack(struct trace_array *tr,
734                                       struct ring_buffer *buffer,
735                                       unsigned long flags,
736                                       int skip, int pc, struct pt_regs *regs)
737 {
738 }
739
740 #endif
741
742 static __always_inline void
743 trace_event_setup(struct ring_buffer_event *event,
744                   int type, unsigned long flags, int pc)
745 {
746         struct trace_entry *ent = ring_buffer_event_data(event);
747
748         tracing_generic_entry_update(ent, flags, pc);
749         ent->type = type;
750 }
751
752 static __always_inline struct ring_buffer_event *
753 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
754                           int type,
755                           unsigned long len,
756                           unsigned long flags, int pc)
757 {
758         struct ring_buffer_event *event;
759
760         event = ring_buffer_lock_reserve(buffer, len);
761         if (event != NULL)
762                 trace_event_setup(event, type, flags, pc);
763
764         return event;
765 }
766
767 void tracer_tracing_on(struct trace_array *tr)
768 {
769         if (tr->trace_buffer.buffer)
770                 ring_buffer_record_on(tr->trace_buffer.buffer);
771         /*
772          * This flag is looked at when buffers haven't been allocated
773          * yet, or by some tracers (like irqsoff), that just want to
774          * know if the ring buffer has been disabled, but it can handle
775          * races of where it gets disabled but we still do a record.
776          * As the check is in the fast path of the tracers, it is more
777          * important to be fast than accurate.
778          */
779         tr->buffer_disabled = 0;
780         /* Make the flag seen by readers */
781         smp_wmb();
782 }
783
784 /**
785  * tracing_on - enable tracing buffers
786  *
787  * This function enables tracing buffers that may have been
788  * disabled with tracing_off.
789  */
790 void tracing_on(void)
791 {
792         tracer_tracing_on(&global_trace);
793 }
794 EXPORT_SYMBOL_GPL(tracing_on);
795
796
797 static __always_inline void
798 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
799 {
800         __this_cpu_write(trace_taskinfo_save, true);
801
802         /* If this is the temp buffer, we need to commit fully */
803         if (this_cpu_read(trace_buffered_event) == event) {
804                 /* Length is in event->array[0] */
805                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
806                 /* Release the temp buffer */
807                 this_cpu_dec(trace_buffered_event_cnt);
808         } else
809                 ring_buffer_unlock_commit(buffer, event);
810 }
811
812 /**
813  * __trace_puts - write a constant string into the trace buffer.
814  * @ip:    The address of the caller
815  * @str:   The constant string to write
816  * @size:  The size of the string.
817  */
818 int __trace_puts(unsigned long ip, const char *str, int size)
819 {
820         struct ring_buffer_event *event;
821         struct ring_buffer *buffer;
822         struct print_entry *entry;
823         unsigned long irq_flags;
824         int alloc;
825         int pc;
826
827         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
828                 return 0;
829
830         pc = preempt_count();
831
832         if (unlikely(tracing_selftest_running || tracing_disabled))
833                 return 0;
834
835         alloc = sizeof(*entry) + size + 2; /* possible \n added */
836
837         local_save_flags(irq_flags);
838         buffer = global_trace.trace_buffer.buffer;
839         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
840                                             irq_flags, pc);
841         if (!event)
842                 return 0;
843
844         entry = ring_buffer_event_data(event);
845         entry->ip = ip;
846
847         memcpy(&entry->buf, str, size);
848
849         /* Add a newline if necessary */
850         if (entry->buf[size - 1] != '\n') {
851                 entry->buf[size] = '\n';
852                 entry->buf[size + 1] = '\0';
853         } else
854                 entry->buf[size] = '\0';
855
856         __buffer_unlock_commit(buffer, event);
857         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
858
859         return size;
860 }
861 EXPORT_SYMBOL_GPL(__trace_puts);
862
863 /**
864  * __trace_bputs - write the pointer to a constant string into trace buffer
865  * @ip:    The address of the caller
866  * @str:   The constant string to write to the buffer to
867  */
868 int __trace_bputs(unsigned long ip, const char *str)
869 {
870         struct ring_buffer_event *event;
871         struct ring_buffer *buffer;
872         struct bputs_entry *entry;
873         unsigned long irq_flags;
874         int size = sizeof(struct bputs_entry);
875         int pc;
876
877         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
878                 return 0;
879
880         pc = preempt_count();
881
882         if (unlikely(tracing_selftest_running || tracing_disabled))
883                 return 0;
884
885         local_save_flags(irq_flags);
886         buffer = global_trace.trace_buffer.buffer;
887         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
888                                             irq_flags, pc);
889         if (!event)
890                 return 0;
891
892         entry = ring_buffer_event_data(event);
893         entry->ip                       = ip;
894         entry->str                      = str;
895
896         __buffer_unlock_commit(buffer, event);
897         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898
899         return 1;
900 }
901 EXPORT_SYMBOL_GPL(__trace_bputs);
902
903 #ifdef CONFIG_TRACER_SNAPSHOT
904 void tracing_snapshot_instance(struct trace_array *tr)
905 {
906         struct tracer *tracer = tr->current_trace;
907         unsigned long flags;
908
909         if (in_nmi()) {
910                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
911                 internal_trace_puts("*** snapshot is being ignored        ***\n");
912                 return;
913         }
914
915         if (!tr->allocated_snapshot) {
916                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
917                 internal_trace_puts("*** stopping trace here!   ***\n");
918                 tracing_off();
919                 return;
920         }
921
922         /* Note, snapshot can not be used when the tracer uses it */
923         if (tracer->use_max_tr) {
924                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
925                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
926                 return;
927         }
928
929         local_irq_save(flags);
930         update_max_tr(tr, current, smp_processor_id());
931         local_irq_restore(flags);
932 }
933
934 /**
935  * tracing_snapshot - take a snapshot of the current buffer.
936  *
937  * This causes a swap between the snapshot buffer and the current live
938  * tracing buffer. You can use this to take snapshots of the live
939  * trace when some condition is triggered, but continue to trace.
940  *
941  * Note, make sure to allocate the snapshot with either
942  * a tracing_snapshot_alloc(), or by doing it manually
943  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
944  *
945  * If the snapshot buffer is not allocated, it will stop tracing.
946  * Basically making a permanent snapshot.
947  */
948 void tracing_snapshot(void)
949 {
950         struct trace_array *tr = &global_trace;
951
952         tracing_snapshot_instance(tr);
953 }
954 EXPORT_SYMBOL_GPL(tracing_snapshot);
955
956 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
957                                         struct trace_buffer *size_buf, int cpu_id);
958 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
959
960 int tracing_alloc_snapshot_instance(struct trace_array *tr)
961 {
962         int ret;
963
964         if (!tr->allocated_snapshot) {
965
966                 /* allocate spare buffer */
967                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
968                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
969                 if (ret < 0)
970                         return ret;
971
972                 tr->allocated_snapshot = true;
973         }
974
975         return 0;
976 }
977
978 static void free_snapshot(struct trace_array *tr)
979 {
980         /*
981          * We don't free the ring buffer. instead, resize it because
982          * The max_tr ring buffer has some state (e.g. ring->clock) and
983          * we want preserve it.
984          */
985         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
986         set_buffer_entries(&tr->max_buffer, 1);
987         tracing_reset_online_cpus(&tr->max_buffer);
988         tr->allocated_snapshot = false;
989 }
990
991 /**
992  * tracing_alloc_snapshot - allocate snapshot buffer.
993  *
994  * This only allocates the snapshot buffer if it isn't already
995  * allocated - it doesn't also take a snapshot.
996  *
997  * This is meant to be used in cases where the snapshot buffer needs
998  * to be set up for events that can't sleep but need to be able to
999  * trigger a snapshot.
1000  */
1001 int tracing_alloc_snapshot(void)
1002 {
1003         struct trace_array *tr = &global_trace;
1004         int ret;
1005
1006         ret = tracing_alloc_snapshot_instance(tr);
1007         WARN_ON(ret < 0);
1008
1009         return ret;
1010 }
1011 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1012
1013 /**
1014  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1015  *
1016  * This is similar to tracing_snapshot(), but it will allocate the
1017  * snapshot buffer if it isn't already allocated. Use this only
1018  * where it is safe to sleep, as the allocation may sleep.
1019  *
1020  * This causes a swap between the snapshot buffer and the current live
1021  * tracing buffer. You can use this to take snapshots of the live
1022  * trace when some condition is triggered, but continue to trace.
1023  */
1024 void tracing_snapshot_alloc(void)
1025 {
1026         int ret;
1027
1028         ret = tracing_alloc_snapshot();
1029         if (ret < 0)
1030                 return;
1031
1032         tracing_snapshot();
1033 }
1034 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1035 #else
1036 void tracing_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_snapshot);
1041 int tracing_alloc_snapshot(void)
1042 {
1043         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1044         return -ENODEV;
1045 }
1046 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1047 void tracing_snapshot_alloc(void)
1048 {
1049         /* Give warning */
1050         tracing_snapshot();
1051 }
1052 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1053 #endif /* CONFIG_TRACER_SNAPSHOT */
1054
1055 void tracer_tracing_off(struct trace_array *tr)
1056 {
1057         if (tr->trace_buffer.buffer)
1058                 ring_buffer_record_off(tr->trace_buffer.buffer);
1059         /*
1060          * This flag is looked at when buffers haven't been allocated
1061          * yet, or by some tracers (like irqsoff), that just want to
1062          * know if the ring buffer has been disabled, but it can handle
1063          * races of where it gets disabled but we still do a record.
1064          * As the check is in the fast path of the tracers, it is more
1065          * important to be fast than accurate.
1066          */
1067         tr->buffer_disabled = 1;
1068         /* Make the flag seen by readers */
1069         smp_wmb();
1070 }
1071
1072 /**
1073  * tracing_off - turn off tracing buffers
1074  *
1075  * This function stops the tracing buffers from recording data.
1076  * It does not disable any overhead the tracers themselves may
1077  * be causing. This function simply causes all recording to
1078  * the ring buffers to fail.
1079  */
1080 void tracing_off(void)
1081 {
1082         tracer_tracing_off(&global_trace);
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_off);
1085
1086 void disable_trace_on_warning(void)
1087 {
1088         if (__disable_trace_on_warning)
1089                 tracing_off();
1090 }
1091
1092 /**
1093  * tracer_tracing_is_on - show real state of ring buffer enabled
1094  * @tr : the trace array to know if ring buffer is enabled
1095  *
1096  * Shows real state of the ring buffer if it is enabled or not.
1097  */
1098 bool tracer_tracing_is_on(struct trace_array *tr)
1099 {
1100         if (tr->trace_buffer.buffer)
1101                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1102         return !tr->buffer_disabled;
1103 }
1104
1105 /**
1106  * tracing_is_on - show state of ring buffers enabled
1107  */
1108 int tracing_is_on(void)
1109 {
1110         return tracer_tracing_is_on(&global_trace);
1111 }
1112 EXPORT_SYMBOL_GPL(tracing_is_on);
1113
1114 static int __init set_buf_size(char *str)
1115 {
1116         unsigned long buf_size;
1117
1118         if (!str)
1119                 return 0;
1120         buf_size = memparse(str, &str);
1121         /*
1122          * nr_entries can not be zero and the startup
1123          * tests require some buffer space. Therefore
1124          * ensure we have at least 4096 bytes of buffer.
1125          */
1126         trace_buf_size = max(4096UL, buf_size);
1127         return 1;
1128 }
1129 __setup("trace_buf_size=", set_buf_size);
1130
1131 static int __init set_tracing_thresh(char *str)
1132 {
1133         unsigned long threshold;
1134         int ret;
1135
1136         if (!str)
1137                 return 0;
1138         ret = kstrtoul(str, 0, &threshold);
1139         if (ret < 0)
1140                 return 0;
1141         tracing_thresh = threshold * 1000;
1142         return 1;
1143 }
1144 __setup("tracing_thresh=", set_tracing_thresh);
1145
1146 unsigned long nsecs_to_usecs(unsigned long nsecs)
1147 {
1148         return nsecs / 1000;
1149 }
1150
1151 /*
1152  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1153  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1154  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1155  * of strings in the order that the evals (enum) were defined.
1156  */
1157 #undef C
1158 #define C(a, b) b
1159
1160 /* These must match the bit postions in trace_iterator_flags */
1161 static const char *trace_options[] = {
1162         TRACE_FLAGS
1163         NULL
1164 };
1165
1166 static struct {
1167         u64 (*func)(void);
1168         const char *name;
1169         int in_ns;              /* is this clock in nanoseconds? */
1170 } trace_clocks[] = {
1171         { trace_clock_local,            "local",        1 },
1172         { trace_clock_global,           "global",       1 },
1173         { trace_clock_counter,          "counter",      0 },
1174         { trace_clock_jiffies,          "uptime",       0 },
1175         { trace_clock,                  "perf",         1 },
1176         { ktime_get_mono_fast_ns,       "mono",         1 },
1177         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1178         { ktime_get_boot_fast_ns,       "boot",         1 },
1179         ARCH_TRACE_CLOCKS
1180 };
1181
1182 bool trace_clock_in_ns(struct trace_array *tr)
1183 {
1184         if (trace_clocks[tr->clock_id].in_ns)
1185                 return true;
1186
1187         return false;
1188 }
1189
1190 /*
1191  * trace_parser_get_init - gets the buffer for trace parser
1192  */
1193 int trace_parser_get_init(struct trace_parser *parser, int size)
1194 {
1195         memset(parser, 0, sizeof(*parser));
1196
1197         parser->buffer = kmalloc(size, GFP_KERNEL);
1198         if (!parser->buffer)
1199                 return 1;
1200
1201         parser->size = size;
1202         return 0;
1203 }
1204
1205 /*
1206  * trace_parser_put - frees the buffer for trace parser
1207  */
1208 void trace_parser_put(struct trace_parser *parser)
1209 {
1210         kfree(parser->buffer);
1211         parser->buffer = NULL;
1212 }
1213
1214 /*
1215  * trace_get_user - reads the user input string separated by  space
1216  * (matched by isspace(ch))
1217  *
1218  * For each string found the 'struct trace_parser' is updated,
1219  * and the function returns.
1220  *
1221  * Returns number of bytes read.
1222  *
1223  * See kernel/trace/trace.h for 'struct trace_parser' details.
1224  */
1225 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1226         size_t cnt, loff_t *ppos)
1227 {
1228         char ch;
1229         size_t read = 0;
1230         ssize_t ret;
1231
1232         if (!*ppos)
1233                 trace_parser_clear(parser);
1234
1235         ret = get_user(ch, ubuf++);
1236         if (ret)
1237                 goto out;
1238
1239         read++;
1240         cnt--;
1241
1242         /*
1243          * The parser is not finished with the last write,
1244          * continue reading the user input without skipping spaces.
1245          */
1246         if (!parser->cont) {
1247                 /* skip white space */
1248                 while (cnt && isspace(ch)) {
1249                         ret = get_user(ch, ubuf++);
1250                         if (ret)
1251                                 goto out;
1252                         read++;
1253                         cnt--;
1254                 }
1255
1256                 parser->idx = 0;
1257
1258                 /* only spaces were written */
1259                 if (isspace(ch) || !ch) {
1260                         *ppos += read;
1261                         ret = read;
1262                         goto out;
1263                 }
1264         }
1265
1266         /* read the non-space input */
1267         while (cnt && !isspace(ch) && ch) {
1268                 if (parser->idx < parser->size - 1)
1269                         parser->buffer[parser->idx++] = ch;
1270                 else {
1271                         ret = -EINVAL;
1272                         goto out;
1273                 }
1274                 ret = get_user(ch, ubuf++);
1275                 if (ret)
1276                         goto out;
1277                 read++;
1278                 cnt--;
1279         }
1280
1281         /* We either got finished input or we have to wait for another call. */
1282         if (isspace(ch) || !ch) {
1283                 parser->buffer[parser->idx] = 0;
1284                 parser->cont = false;
1285         } else if (parser->idx < parser->size - 1) {
1286                 parser->cont = true;
1287                 parser->buffer[parser->idx++] = ch;
1288                 /* Make sure the parsed string always terminates with '\0'. */
1289                 parser->buffer[parser->idx] = 0;
1290         } else {
1291                 ret = -EINVAL;
1292                 goto out;
1293         }
1294
1295         *ppos += read;
1296         ret = read;
1297
1298 out:
1299         return ret;
1300 }
1301
1302 /* TODO add a seq_buf_to_buffer() */
1303 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1304 {
1305         int len;
1306
1307         if (trace_seq_used(s) <= s->seq.readpos)
1308                 return -EBUSY;
1309
1310         len = trace_seq_used(s) - s->seq.readpos;
1311         if (cnt > len)
1312                 cnt = len;
1313         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1314
1315         s->seq.readpos += cnt;
1316         return cnt;
1317 }
1318
1319 unsigned long __read_mostly     tracing_thresh;
1320
1321 #ifdef CONFIG_TRACER_MAX_TRACE
1322 /*
1323  * Copy the new maximum trace into the separate maximum-trace
1324  * structure. (this way the maximum trace is permanently saved,
1325  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1326  */
1327 static void
1328 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1329 {
1330         struct trace_buffer *trace_buf = &tr->trace_buffer;
1331         struct trace_buffer *max_buf = &tr->max_buffer;
1332         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1333         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1334
1335         max_buf->cpu = cpu;
1336         max_buf->time_start = data->preempt_timestamp;
1337
1338         max_data->saved_latency = tr->max_latency;
1339         max_data->critical_start = data->critical_start;
1340         max_data->critical_end = data->critical_end;
1341
1342         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1343         max_data->pid = tsk->pid;
1344         /*
1345          * If tsk == current, then use current_uid(), as that does not use
1346          * RCU. The irq tracer can be called out of RCU scope.
1347          */
1348         if (tsk == current)
1349                 max_data->uid = current_uid();
1350         else
1351                 max_data->uid = task_uid(tsk);
1352
1353         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1354         max_data->policy = tsk->policy;
1355         max_data->rt_priority = tsk->rt_priority;
1356
1357         /* record this tasks comm */
1358         tracing_record_cmdline(tsk);
1359 }
1360
1361 /**
1362  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1363  * @tr: tracer
1364  * @tsk: the task with the latency
1365  * @cpu: The cpu that initiated the trace.
1366  *
1367  * Flip the buffers between the @tr and the max_tr and record information
1368  * about which task was the cause of this latency.
1369  */
1370 void
1371 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1372 {
1373         if (tr->stop_count)
1374                 return;
1375
1376         WARN_ON_ONCE(!irqs_disabled());
1377
1378         if (!tr->allocated_snapshot) {
1379                 /* Only the nop tracer should hit this when disabling */
1380                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1381                 return;
1382         }
1383
1384         arch_spin_lock(&tr->max_lock);
1385
1386         /* Inherit the recordable setting from trace_buffer */
1387         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1388                 ring_buffer_record_on(tr->max_buffer.buffer);
1389         else
1390                 ring_buffer_record_off(tr->max_buffer.buffer);
1391
1392         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1393
1394         __update_max_tr(tr, tsk, cpu);
1395         arch_spin_unlock(&tr->max_lock);
1396 }
1397
1398 /**
1399  * update_max_tr_single - only copy one trace over, and reset the rest
1400  * @tr - tracer
1401  * @tsk - task with the latency
1402  * @cpu - the cpu of the buffer to copy.
1403  *
1404  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1405  */
1406 void
1407 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1408 {
1409         int ret;
1410
1411         if (tr->stop_count)
1412                 return;
1413
1414         WARN_ON_ONCE(!irqs_disabled());
1415         if (!tr->allocated_snapshot) {
1416                 /* Only the nop tracer should hit this when disabling */
1417                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1418                 return;
1419         }
1420
1421         arch_spin_lock(&tr->max_lock);
1422
1423         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1424
1425         if (ret == -EBUSY) {
1426                 /*
1427                  * We failed to swap the buffer due to a commit taking
1428                  * place on this CPU. We fail to record, but we reset
1429                  * the max trace buffer (no one writes directly to it)
1430                  * and flag that it failed.
1431                  */
1432                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1433                         "Failed to swap buffers due to commit in progress\n");
1434         }
1435
1436         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1437
1438         __update_max_tr(tr, tsk, cpu);
1439         arch_spin_unlock(&tr->max_lock);
1440 }
1441 #endif /* CONFIG_TRACER_MAX_TRACE */
1442
1443 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1444 {
1445         /* Iterators are static, they should be filled or empty */
1446         if (trace_buffer_iter(iter, iter->cpu_file))
1447                 return 0;
1448
1449         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1450                                 full);
1451 }
1452
1453 #ifdef CONFIG_FTRACE_STARTUP_TEST
1454 static bool selftests_can_run;
1455
1456 struct trace_selftests {
1457         struct list_head                list;
1458         struct tracer                   *type;
1459 };
1460
1461 static LIST_HEAD(postponed_selftests);
1462
1463 static int save_selftest(struct tracer *type)
1464 {
1465         struct trace_selftests *selftest;
1466
1467         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1468         if (!selftest)
1469                 return -ENOMEM;
1470
1471         selftest->type = type;
1472         list_add(&selftest->list, &postponed_selftests);
1473         return 0;
1474 }
1475
1476 static int run_tracer_selftest(struct tracer *type)
1477 {
1478         struct trace_array *tr = &global_trace;
1479         struct tracer *saved_tracer = tr->current_trace;
1480         int ret;
1481
1482         if (!type->selftest || tracing_selftest_disabled)
1483                 return 0;
1484
1485         /*
1486          * If a tracer registers early in boot up (before scheduling is
1487          * initialized and such), then do not run its selftests yet.
1488          * Instead, run it a little later in the boot process.
1489          */
1490         if (!selftests_can_run)
1491                 return save_selftest(type);
1492
1493         /*
1494          * Run a selftest on this tracer.
1495          * Here we reset the trace buffer, and set the current
1496          * tracer to be this tracer. The tracer can then run some
1497          * internal tracing to verify that everything is in order.
1498          * If we fail, we do not register this tracer.
1499          */
1500         tracing_reset_online_cpus(&tr->trace_buffer);
1501
1502         tr->current_trace = type;
1503
1504 #ifdef CONFIG_TRACER_MAX_TRACE
1505         if (type->use_max_tr) {
1506                 /* If we expanded the buffers, make sure the max is expanded too */
1507                 if (ring_buffer_expanded)
1508                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1509                                            RING_BUFFER_ALL_CPUS);
1510                 tr->allocated_snapshot = true;
1511         }
1512 #endif
1513
1514         /* the test is responsible for initializing and enabling */
1515         pr_info("Testing tracer %s: ", type->name);
1516         ret = type->selftest(type, tr);
1517         /* the test is responsible for resetting too */
1518         tr->current_trace = saved_tracer;
1519         if (ret) {
1520                 printk(KERN_CONT "FAILED!\n");
1521                 /* Add the warning after printing 'FAILED' */
1522                 WARN_ON(1);
1523                 return -1;
1524         }
1525         /* Only reset on passing, to avoid touching corrupted buffers */
1526         tracing_reset_online_cpus(&tr->trace_buffer);
1527
1528 #ifdef CONFIG_TRACER_MAX_TRACE
1529         if (type->use_max_tr) {
1530                 tr->allocated_snapshot = false;
1531
1532                 /* Shrink the max buffer again */
1533                 if (ring_buffer_expanded)
1534                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1535                                            RING_BUFFER_ALL_CPUS);
1536         }
1537 #endif
1538
1539         printk(KERN_CONT "PASSED\n");
1540         return 0;
1541 }
1542
1543 static __init int init_trace_selftests(void)
1544 {
1545         struct trace_selftests *p, *n;
1546         struct tracer *t, **last;
1547         int ret;
1548
1549         selftests_can_run = true;
1550
1551         mutex_lock(&trace_types_lock);
1552
1553         if (list_empty(&postponed_selftests))
1554                 goto out;
1555
1556         pr_info("Running postponed tracer tests:\n");
1557
1558         tracing_selftest_running = true;
1559         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1560                 ret = run_tracer_selftest(p->type);
1561                 /* If the test fails, then warn and remove from available_tracers */
1562                 if (ret < 0) {
1563                         WARN(1, "tracer: %s failed selftest, disabling\n",
1564                              p->type->name);
1565                         last = &trace_types;
1566                         for (t = trace_types; t; t = t->next) {
1567                                 if (t == p->type) {
1568                                         *last = t->next;
1569                                         break;
1570                                 }
1571                                 last = &t->next;
1572                         }
1573                 }
1574                 list_del(&p->list);
1575                 kfree(p);
1576         }
1577         tracing_selftest_running = false;
1578
1579  out:
1580         mutex_unlock(&trace_types_lock);
1581
1582         return 0;
1583 }
1584 core_initcall(init_trace_selftests);
1585 #else
1586 static inline int run_tracer_selftest(struct tracer *type)
1587 {
1588         return 0;
1589 }
1590 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1591
1592 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1593
1594 static void __init apply_trace_boot_options(void);
1595
1596 /**
1597  * register_tracer - register a tracer with the ftrace system.
1598  * @type - the plugin for the tracer
1599  *
1600  * Register a new plugin tracer.
1601  */
1602 int __init register_tracer(struct tracer *type)
1603 {
1604         struct tracer *t;
1605         int ret = 0;
1606
1607         if (!type->name) {
1608                 pr_info("Tracer must have a name\n");
1609                 return -1;
1610         }
1611
1612         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1613                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1614                 return -1;
1615         }
1616
1617         mutex_lock(&trace_types_lock);
1618
1619         tracing_selftest_running = true;
1620
1621         for (t = trace_types; t; t = t->next) {
1622                 if (strcmp(type->name, t->name) == 0) {
1623                         /* already found */
1624                         pr_info("Tracer %s already registered\n",
1625                                 type->name);
1626                         ret = -1;
1627                         goto out;
1628                 }
1629         }
1630
1631         if (!type->set_flag)
1632                 type->set_flag = &dummy_set_flag;
1633         if (!type->flags) {
1634                 /*allocate a dummy tracer_flags*/
1635                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1636                 if (!type->flags) {
1637                         ret = -ENOMEM;
1638                         goto out;
1639                 }
1640                 type->flags->val = 0;
1641                 type->flags->opts = dummy_tracer_opt;
1642         } else
1643                 if (!type->flags->opts)
1644                         type->flags->opts = dummy_tracer_opt;
1645
1646         /* store the tracer for __set_tracer_option */
1647         type->flags->trace = type;
1648
1649         ret = run_tracer_selftest(type);
1650         if (ret < 0)
1651                 goto out;
1652
1653         type->next = trace_types;
1654         trace_types = type;
1655         add_tracer_options(&global_trace, type);
1656
1657  out:
1658         tracing_selftest_running = false;
1659         mutex_unlock(&trace_types_lock);
1660
1661         if (ret || !default_bootup_tracer)
1662                 goto out_unlock;
1663
1664         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1665                 goto out_unlock;
1666
1667         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1668         /* Do we want this tracer to start on bootup? */
1669         tracing_set_tracer(&global_trace, type->name);
1670         default_bootup_tracer = NULL;
1671
1672         apply_trace_boot_options();
1673
1674         /* disable other selftests, since this will break it. */
1675         tracing_selftest_disabled = true;
1676 #ifdef CONFIG_FTRACE_STARTUP_TEST
1677         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1678                type->name);
1679 #endif
1680
1681  out_unlock:
1682         return ret;
1683 }
1684
1685 void tracing_reset(struct trace_buffer *buf, int cpu)
1686 {
1687         struct ring_buffer *buffer = buf->buffer;
1688
1689         if (!buffer)
1690                 return;
1691
1692         ring_buffer_record_disable(buffer);
1693
1694         /* Make sure all commits have finished */
1695         synchronize_sched();
1696         ring_buffer_reset_cpu(buffer, cpu);
1697
1698         ring_buffer_record_enable(buffer);
1699 }
1700
1701 void tracing_reset_online_cpus(struct trace_buffer *buf)
1702 {
1703         struct ring_buffer *buffer = buf->buffer;
1704         int cpu;
1705
1706         if (!buffer)
1707                 return;
1708
1709         ring_buffer_record_disable(buffer);
1710
1711         /* Make sure all commits have finished */
1712         synchronize_sched();
1713
1714         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1715
1716         for_each_online_cpu(cpu)
1717                 ring_buffer_reset_cpu(buffer, cpu);
1718
1719         ring_buffer_record_enable(buffer);
1720 }
1721
1722 /* Must have trace_types_lock held */
1723 void tracing_reset_all_online_cpus(void)
1724 {
1725         struct trace_array *tr;
1726
1727         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1728                 if (!tr->clear_trace)
1729                         continue;
1730                 tr->clear_trace = false;
1731                 tracing_reset_online_cpus(&tr->trace_buffer);
1732 #ifdef CONFIG_TRACER_MAX_TRACE
1733                 tracing_reset_online_cpus(&tr->max_buffer);
1734 #endif
1735         }
1736 }
1737
1738 /*
1739  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1740  * is the tgid last observed corresponding to pid=i.
1741  */
1742 static int *tgid_map;
1743
1744 /* The maximum valid index into tgid_map. */
1745 static size_t tgid_map_max;
1746
1747 #define SAVED_CMDLINES_DEFAULT 128
1748 #define NO_CMDLINE_MAP UINT_MAX
1749 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1750 struct saved_cmdlines_buffer {
1751         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1752         unsigned *map_cmdline_to_pid;
1753         unsigned cmdline_num;
1754         int cmdline_idx;
1755         char *saved_cmdlines;
1756 };
1757 static struct saved_cmdlines_buffer *savedcmd;
1758
1759 static inline char *get_saved_cmdlines(int idx)
1760 {
1761         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1762 }
1763
1764 static inline void set_cmdline(int idx, const char *cmdline)
1765 {
1766         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1767 }
1768
1769 static int allocate_cmdlines_buffer(unsigned int val,
1770                                     struct saved_cmdlines_buffer *s)
1771 {
1772         s->map_cmdline_to_pid = kmalloc_array(val,
1773                                               sizeof(*s->map_cmdline_to_pid),
1774                                               GFP_KERNEL);
1775         if (!s->map_cmdline_to_pid)
1776                 return -ENOMEM;
1777
1778         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1779         if (!s->saved_cmdlines) {
1780                 kfree(s->map_cmdline_to_pid);
1781                 return -ENOMEM;
1782         }
1783
1784         s->cmdline_idx = 0;
1785         s->cmdline_num = val;
1786         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1787                sizeof(s->map_pid_to_cmdline));
1788         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1789                val * sizeof(*s->map_cmdline_to_pid));
1790
1791         return 0;
1792 }
1793
1794 static int trace_create_savedcmd(void)
1795 {
1796         int ret;
1797
1798         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1799         if (!savedcmd)
1800                 return -ENOMEM;
1801
1802         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1803         if (ret < 0) {
1804                 kfree(savedcmd);
1805                 savedcmd = NULL;
1806                 return -ENOMEM;
1807         }
1808
1809         return 0;
1810 }
1811
1812 int is_tracing_stopped(void)
1813 {
1814         return global_trace.stop_count;
1815 }
1816
1817 /**
1818  * tracing_start - quick start of the tracer
1819  *
1820  * If tracing is enabled but was stopped by tracing_stop,
1821  * this will start the tracer back up.
1822  */
1823 void tracing_start(void)
1824 {
1825         struct ring_buffer *buffer;
1826         unsigned long flags;
1827
1828         if (tracing_disabled)
1829                 return;
1830
1831         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1832         if (--global_trace.stop_count) {
1833                 if (global_trace.stop_count < 0) {
1834                         /* Someone screwed up their debugging */
1835                         WARN_ON_ONCE(1);
1836                         global_trace.stop_count = 0;
1837                 }
1838                 goto out;
1839         }
1840
1841         /* Prevent the buffers from switching */
1842         arch_spin_lock(&global_trace.max_lock);
1843
1844         buffer = global_trace.trace_buffer.buffer;
1845         if (buffer)
1846                 ring_buffer_record_enable(buffer);
1847
1848 #ifdef CONFIG_TRACER_MAX_TRACE
1849         buffer = global_trace.max_buffer.buffer;
1850         if (buffer)
1851                 ring_buffer_record_enable(buffer);
1852 #endif
1853
1854         arch_spin_unlock(&global_trace.max_lock);
1855
1856  out:
1857         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1858 }
1859
1860 static void tracing_start_tr(struct trace_array *tr)
1861 {
1862         struct ring_buffer *buffer;
1863         unsigned long flags;
1864
1865         if (tracing_disabled)
1866                 return;
1867
1868         /* If global, we need to also start the max tracer */
1869         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1870                 return tracing_start();
1871
1872         raw_spin_lock_irqsave(&tr->start_lock, flags);
1873
1874         if (--tr->stop_count) {
1875                 if (tr->stop_count < 0) {
1876                         /* Someone screwed up their debugging */
1877                         WARN_ON_ONCE(1);
1878                         tr->stop_count = 0;
1879                 }
1880                 goto out;
1881         }
1882
1883         buffer = tr->trace_buffer.buffer;
1884         if (buffer)
1885                 ring_buffer_record_enable(buffer);
1886
1887  out:
1888         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1889 }
1890
1891 /**
1892  * tracing_stop - quick stop of the tracer
1893  *
1894  * Light weight way to stop tracing. Use in conjunction with
1895  * tracing_start.
1896  */
1897 void tracing_stop(void)
1898 {
1899         struct ring_buffer *buffer;
1900         unsigned long flags;
1901
1902         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1903         if (global_trace.stop_count++)
1904                 goto out;
1905
1906         /* Prevent the buffers from switching */
1907         arch_spin_lock(&global_trace.max_lock);
1908
1909         buffer = global_trace.trace_buffer.buffer;
1910         if (buffer)
1911                 ring_buffer_record_disable(buffer);
1912
1913 #ifdef CONFIG_TRACER_MAX_TRACE
1914         buffer = global_trace.max_buffer.buffer;
1915         if (buffer)
1916                 ring_buffer_record_disable(buffer);
1917 #endif
1918
1919         arch_spin_unlock(&global_trace.max_lock);
1920
1921  out:
1922         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1923 }
1924
1925 static void tracing_stop_tr(struct trace_array *tr)
1926 {
1927         struct ring_buffer *buffer;
1928         unsigned long flags;
1929
1930         /* If global, we need to also stop the max tracer */
1931         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1932                 return tracing_stop();
1933
1934         raw_spin_lock_irqsave(&tr->start_lock, flags);
1935         if (tr->stop_count++)
1936                 goto out;
1937
1938         buffer = tr->trace_buffer.buffer;
1939         if (buffer)
1940                 ring_buffer_record_disable(buffer);
1941
1942  out:
1943         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1944 }
1945
1946 static int trace_save_cmdline(struct task_struct *tsk)
1947 {
1948         unsigned tpid, idx;
1949
1950         /* treat recording of idle task as a success */
1951         if (!tsk->pid)
1952                 return 1;
1953
1954         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1955
1956         /*
1957          * It's not the end of the world if we don't get
1958          * the lock, but we also don't want to spin
1959          * nor do we want to disable interrupts,
1960          * so if we miss here, then better luck next time.
1961          */
1962         if (!arch_spin_trylock(&trace_cmdline_lock))
1963                 return 0;
1964
1965         idx = savedcmd->map_pid_to_cmdline[tpid];
1966         if (idx == NO_CMDLINE_MAP) {
1967                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1968
1969                 savedcmd->map_pid_to_cmdline[tpid] = idx;
1970                 savedcmd->cmdline_idx = idx;
1971         }
1972
1973         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1974         set_cmdline(idx, tsk->comm);
1975
1976         arch_spin_unlock(&trace_cmdline_lock);
1977
1978         return 1;
1979 }
1980
1981 static void __trace_find_cmdline(int pid, char comm[])
1982 {
1983         unsigned map;
1984         int tpid;
1985
1986         if (!pid) {
1987                 strcpy(comm, "<idle>");
1988                 return;
1989         }
1990
1991         if (WARN_ON_ONCE(pid < 0)) {
1992                 strcpy(comm, "<XXX>");
1993                 return;
1994         }
1995
1996         tpid = pid & (PID_MAX_DEFAULT - 1);
1997         map = savedcmd->map_pid_to_cmdline[tpid];
1998         if (map != NO_CMDLINE_MAP) {
1999                 tpid = savedcmd->map_cmdline_to_pid[map];
2000                 if (tpid == pid) {
2001                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2002                         return;
2003                 }
2004         }
2005         strcpy(comm, "<...>");
2006 }
2007
2008 void trace_find_cmdline(int pid, char comm[])
2009 {
2010         preempt_disable();
2011         arch_spin_lock(&trace_cmdline_lock);
2012
2013         __trace_find_cmdline(pid, comm);
2014
2015         arch_spin_unlock(&trace_cmdline_lock);
2016         preempt_enable();
2017 }
2018
2019 static int *trace_find_tgid_ptr(int pid)
2020 {
2021         /*
2022          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2023          * if we observe a non-NULL tgid_map then we also observe the correct
2024          * tgid_map_max.
2025          */
2026         int *map = smp_load_acquire(&tgid_map);
2027
2028         if (unlikely(!map || pid > tgid_map_max))
2029                 return NULL;
2030
2031         return &map[pid];
2032 }
2033
2034 int trace_find_tgid(int pid)
2035 {
2036         int *ptr = trace_find_tgid_ptr(pid);
2037
2038         return ptr ? *ptr : 0;
2039 }
2040
2041 static int trace_save_tgid(struct task_struct *tsk)
2042 {
2043         int *ptr;
2044
2045         /* treat recording of idle task as a success */
2046         if (!tsk->pid)
2047                 return 1;
2048
2049         ptr = trace_find_tgid_ptr(tsk->pid);
2050         if (!ptr)
2051                 return 0;
2052
2053         *ptr = tsk->tgid;
2054         return 1;
2055 }
2056
2057 static bool tracing_record_taskinfo_skip(int flags)
2058 {
2059         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2060                 return true;
2061         if (!__this_cpu_read(trace_taskinfo_save))
2062                 return true;
2063         return false;
2064 }
2065
2066 /**
2067  * tracing_record_taskinfo - record the task info of a task
2068  *
2069  * @task  - task to record
2070  * @flags - TRACE_RECORD_CMDLINE for recording comm
2071  *        - TRACE_RECORD_TGID for recording tgid
2072  */
2073 void tracing_record_taskinfo(struct task_struct *task, int flags)
2074 {
2075         bool done;
2076
2077         if (tracing_record_taskinfo_skip(flags))
2078                 return;
2079
2080         /*
2081          * Record as much task information as possible. If some fail, continue
2082          * to try to record the others.
2083          */
2084         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2085         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2086
2087         /* If recording any information failed, retry again soon. */
2088         if (!done)
2089                 return;
2090
2091         __this_cpu_write(trace_taskinfo_save, false);
2092 }
2093
2094 /**
2095  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2096  *
2097  * @prev - previous task during sched_switch
2098  * @next - next task during sched_switch
2099  * @flags - TRACE_RECORD_CMDLINE for recording comm
2100  *          TRACE_RECORD_TGID for recording tgid
2101  */
2102 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2103                                           struct task_struct *next, int flags)
2104 {
2105         bool done;
2106
2107         if (tracing_record_taskinfo_skip(flags))
2108                 return;
2109
2110         /*
2111          * Record as much task information as possible. If some fail, continue
2112          * to try to record the others.
2113          */
2114         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2115         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2116         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2117         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2118
2119         /* If recording any information failed, retry again soon. */
2120         if (!done)
2121                 return;
2122
2123         __this_cpu_write(trace_taskinfo_save, false);
2124 }
2125
2126 /* Helpers to record a specific task information */
2127 void tracing_record_cmdline(struct task_struct *task)
2128 {
2129         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2130 }
2131
2132 void tracing_record_tgid(struct task_struct *task)
2133 {
2134         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2135 }
2136
2137 /*
2138  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2139  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2140  * simplifies those functions and keeps them in sync.
2141  */
2142 enum print_line_t trace_handle_return(struct trace_seq *s)
2143 {
2144         return trace_seq_has_overflowed(s) ?
2145                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2146 }
2147 EXPORT_SYMBOL_GPL(trace_handle_return);
2148
2149 void
2150 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2151                              int pc)
2152 {
2153         struct task_struct *tsk = current;
2154
2155         entry->preempt_count            = pc & 0xff;
2156         entry->pid                      = (tsk) ? tsk->pid : 0;
2157         entry->flags =
2158 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2159                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2160 #else
2161                 TRACE_FLAG_IRQS_NOSUPPORT |
2162 #endif
2163                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2164                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2165                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2166                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2167                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2168 }
2169 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2170
2171 struct ring_buffer_event *
2172 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2173                           int type,
2174                           unsigned long len,
2175                           unsigned long flags, int pc)
2176 {
2177         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2178 }
2179
2180 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2181 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2182 static int trace_buffered_event_ref;
2183
2184 /**
2185  * trace_buffered_event_enable - enable buffering events
2186  *
2187  * When events are being filtered, it is quicker to use a temporary
2188  * buffer to write the event data into if there's a likely chance
2189  * that it will not be committed. The discard of the ring buffer
2190  * is not as fast as committing, and is much slower than copying
2191  * a commit.
2192  *
2193  * When an event is to be filtered, allocate per cpu buffers to
2194  * write the event data into, and if the event is filtered and discarded
2195  * it is simply dropped, otherwise, the entire data is to be committed
2196  * in one shot.
2197  */
2198 void trace_buffered_event_enable(void)
2199 {
2200         struct ring_buffer_event *event;
2201         struct page *page;
2202         int cpu;
2203
2204         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2205
2206         if (trace_buffered_event_ref++)
2207                 return;
2208
2209         for_each_tracing_cpu(cpu) {
2210                 page = alloc_pages_node(cpu_to_node(cpu),
2211                                         GFP_KERNEL | __GFP_NORETRY, 0);
2212                 if (!page)
2213                         goto failed;
2214
2215                 event = page_address(page);
2216                 memset(event, 0, sizeof(*event));
2217
2218                 per_cpu(trace_buffered_event, cpu) = event;
2219
2220                 preempt_disable();
2221                 if (cpu == smp_processor_id() &&
2222                     this_cpu_read(trace_buffered_event) !=
2223                     per_cpu(trace_buffered_event, cpu))
2224                         WARN_ON_ONCE(1);
2225                 preempt_enable();
2226         }
2227
2228         return;
2229  failed:
2230         trace_buffered_event_disable();
2231 }
2232
2233 static void enable_trace_buffered_event(void *data)
2234 {
2235         /* Probably not needed, but do it anyway */
2236         smp_rmb();
2237         this_cpu_dec(trace_buffered_event_cnt);
2238 }
2239
2240 static void disable_trace_buffered_event(void *data)
2241 {
2242         this_cpu_inc(trace_buffered_event_cnt);
2243 }
2244
2245 /**
2246  * trace_buffered_event_disable - disable buffering events
2247  *
2248  * When a filter is removed, it is faster to not use the buffered
2249  * events, and to commit directly into the ring buffer. Free up
2250  * the temp buffers when there are no more users. This requires
2251  * special synchronization with current events.
2252  */
2253 void trace_buffered_event_disable(void)
2254 {
2255         int cpu;
2256
2257         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2258
2259         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2260                 return;
2261
2262         if (--trace_buffered_event_ref)
2263                 return;
2264
2265         preempt_disable();
2266         /* For each CPU, set the buffer as used. */
2267         smp_call_function_many(tracing_buffer_mask,
2268                                disable_trace_buffered_event, NULL, 1);
2269         preempt_enable();
2270
2271         /* Wait for all current users to finish */
2272         synchronize_sched();
2273
2274         for_each_tracing_cpu(cpu) {
2275                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2276                 per_cpu(trace_buffered_event, cpu) = NULL;
2277         }
2278         /*
2279          * Make sure trace_buffered_event is NULL before clearing
2280          * trace_buffered_event_cnt.
2281          */
2282         smp_wmb();
2283
2284         preempt_disable();
2285         /* Do the work on each cpu */
2286         smp_call_function_many(tracing_buffer_mask,
2287                                enable_trace_buffered_event, NULL, 1);
2288         preempt_enable();
2289 }
2290
2291 static struct ring_buffer *temp_buffer;
2292
2293 struct ring_buffer_event *
2294 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2295                           struct trace_event_file *trace_file,
2296                           int type, unsigned long len,
2297                           unsigned long flags, int pc)
2298 {
2299         struct ring_buffer_event *entry;
2300         int val;
2301
2302         *current_rb = trace_file->tr->trace_buffer.buffer;
2303
2304         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2305              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2306             (entry = this_cpu_read(trace_buffered_event))) {
2307                 /* Try to use the per cpu buffer first */
2308                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2309                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2310                         trace_event_setup(entry, type, flags, pc);
2311                         entry->array[0] = len;
2312                         return entry;
2313                 }
2314                 this_cpu_dec(trace_buffered_event_cnt);
2315         }
2316
2317         entry = __trace_buffer_lock_reserve(*current_rb,
2318                                             type, len, flags, pc);
2319         /*
2320          * If tracing is off, but we have triggers enabled
2321          * we still need to look at the event data. Use the temp_buffer
2322          * to store the trace event for the tigger to use. It's recusive
2323          * safe and will not be recorded anywhere.
2324          */
2325         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2326                 *current_rb = temp_buffer;
2327                 entry = __trace_buffer_lock_reserve(*current_rb,
2328                                                     type, len, flags, pc);
2329         }
2330         return entry;
2331 }
2332 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2333
2334 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2335 static DEFINE_MUTEX(tracepoint_printk_mutex);
2336
2337 static void output_printk(struct trace_event_buffer *fbuffer)
2338 {
2339         struct trace_event_call *event_call;
2340         struct trace_event *event;
2341         unsigned long flags;
2342         struct trace_iterator *iter = tracepoint_print_iter;
2343
2344         /* We should never get here if iter is NULL */
2345         if (WARN_ON_ONCE(!iter))
2346                 return;
2347
2348         event_call = fbuffer->trace_file->event_call;
2349         if (!event_call || !event_call->event.funcs ||
2350             !event_call->event.funcs->trace)
2351                 return;
2352
2353         event = &fbuffer->trace_file->event_call->event;
2354
2355         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2356         trace_seq_init(&iter->seq);
2357         iter->ent = fbuffer->entry;
2358         event_call->event.funcs->trace(iter, 0, event);
2359         trace_seq_putc(&iter->seq, 0);
2360         printk("%s", iter->seq.buffer);
2361
2362         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2363 }
2364
2365 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2366                              void __user *buffer, size_t *lenp,
2367                              loff_t *ppos)
2368 {
2369         int save_tracepoint_printk;
2370         int ret;
2371
2372         mutex_lock(&tracepoint_printk_mutex);
2373         save_tracepoint_printk = tracepoint_printk;
2374
2375         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2376
2377         /*
2378          * This will force exiting early, as tracepoint_printk
2379          * is always zero when tracepoint_printk_iter is not allocated
2380          */
2381         if (!tracepoint_print_iter)
2382                 tracepoint_printk = 0;
2383
2384         if (save_tracepoint_printk == tracepoint_printk)
2385                 goto out;
2386
2387         if (tracepoint_printk)
2388                 static_key_enable(&tracepoint_printk_key.key);
2389         else
2390                 static_key_disable(&tracepoint_printk_key.key);
2391
2392  out:
2393         mutex_unlock(&tracepoint_printk_mutex);
2394
2395         return ret;
2396 }
2397
2398 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2399 {
2400         if (static_key_false(&tracepoint_printk_key.key))
2401                 output_printk(fbuffer);
2402
2403         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2404                                     fbuffer->event, fbuffer->entry,
2405                                     fbuffer->flags, fbuffer->pc);
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2408
2409 /*
2410  * Skip 3:
2411  *
2412  *   trace_buffer_unlock_commit_regs()
2413  *   trace_event_buffer_commit()
2414  *   trace_event_raw_event_xxx()
2415  */
2416 # define STACK_SKIP 3
2417
2418 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2419                                      struct ring_buffer *buffer,
2420                                      struct ring_buffer_event *event,
2421                                      unsigned long flags, int pc,
2422                                      struct pt_regs *regs)
2423 {
2424         __buffer_unlock_commit(buffer, event);
2425
2426         /*
2427          * If regs is not set, then skip the necessary functions.
2428          * Note, we can still get here via blktrace, wakeup tracer
2429          * and mmiotrace, but that's ok if they lose a function or
2430          * two. They are not that meaningful.
2431          */
2432         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2433         ftrace_trace_userstack(tr, buffer, flags, pc);
2434 }
2435
2436 /*
2437  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2438  */
2439 void
2440 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2441                                    struct ring_buffer_event *event)
2442 {
2443         __buffer_unlock_commit(buffer, event);
2444 }
2445
2446 static void
2447 trace_process_export(struct trace_export *export,
2448                struct ring_buffer_event *event)
2449 {
2450         struct trace_entry *entry;
2451         unsigned int size = 0;
2452
2453         entry = ring_buffer_event_data(event);
2454         size = ring_buffer_event_length(event);
2455         export->write(export, entry, size);
2456 }
2457
2458 static DEFINE_MUTEX(ftrace_export_lock);
2459
2460 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2461
2462 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2463
2464 static inline void ftrace_exports_enable(void)
2465 {
2466         static_branch_enable(&ftrace_exports_enabled);
2467 }
2468
2469 static inline void ftrace_exports_disable(void)
2470 {
2471         static_branch_disable(&ftrace_exports_enabled);
2472 }
2473
2474 void ftrace_exports(struct ring_buffer_event *event)
2475 {
2476         struct trace_export *export;
2477
2478         preempt_disable_notrace();
2479
2480         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2481         while (export) {
2482                 trace_process_export(export, event);
2483                 export = rcu_dereference_raw_notrace(export->next);
2484         }
2485
2486         preempt_enable_notrace();
2487 }
2488
2489 static inline void
2490 add_trace_export(struct trace_export **list, struct trace_export *export)
2491 {
2492         rcu_assign_pointer(export->next, *list);
2493         /*
2494          * We are entering export into the list but another
2495          * CPU might be walking that list. We need to make sure
2496          * the export->next pointer is valid before another CPU sees
2497          * the export pointer included into the list.
2498          */
2499         rcu_assign_pointer(*list, export);
2500 }
2501
2502 static inline int
2503 rm_trace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505         struct trace_export **p;
2506
2507         for (p = list; *p != NULL; p = &(*p)->next)
2508                 if (*p == export)
2509                         break;
2510
2511         if (*p != export)
2512                 return -1;
2513
2514         rcu_assign_pointer(*p, (*p)->next);
2515
2516         return 0;
2517 }
2518
2519 static inline void
2520 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2521 {
2522         if (*list == NULL)
2523                 ftrace_exports_enable();
2524
2525         add_trace_export(list, export);
2526 }
2527
2528 static inline int
2529 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2530 {
2531         int ret;
2532
2533         ret = rm_trace_export(list, export);
2534         if (*list == NULL)
2535                 ftrace_exports_disable();
2536
2537         return ret;
2538 }
2539
2540 int register_ftrace_export(struct trace_export *export)
2541 {
2542         if (WARN_ON_ONCE(!export->write))
2543                 return -1;
2544
2545         mutex_lock(&ftrace_export_lock);
2546
2547         add_ftrace_export(&ftrace_exports_list, export);
2548
2549         mutex_unlock(&ftrace_export_lock);
2550
2551         return 0;
2552 }
2553 EXPORT_SYMBOL_GPL(register_ftrace_export);
2554
2555 int unregister_ftrace_export(struct trace_export *export)
2556 {
2557         int ret;
2558
2559         mutex_lock(&ftrace_export_lock);
2560
2561         ret = rm_ftrace_export(&ftrace_exports_list, export);
2562
2563         mutex_unlock(&ftrace_export_lock);
2564
2565         return ret;
2566 }
2567 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2568
2569 void
2570 trace_function(struct trace_array *tr,
2571                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2572                int pc)
2573 {
2574         struct trace_event_call *call = &event_function;
2575         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2576         struct ring_buffer_event *event;
2577         struct ftrace_entry *entry;
2578
2579         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2580                                             flags, pc);
2581         if (!event)
2582                 return;
2583         entry   = ring_buffer_event_data(event);
2584         entry->ip                       = ip;
2585         entry->parent_ip                = parent_ip;
2586
2587         if (!call_filter_check_discard(call, entry, buffer, event)) {
2588                 if (static_branch_unlikely(&ftrace_exports_enabled))
2589                         ftrace_exports(event);
2590                 __buffer_unlock_commit(buffer, event);
2591         }
2592 }
2593
2594 #ifdef CONFIG_STACKTRACE
2595
2596 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2597 struct ftrace_stack {
2598         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2599 };
2600
2601 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2602 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2603
2604 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2605                                  unsigned long flags,
2606                                  int skip, int pc, struct pt_regs *regs)
2607 {
2608         struct trace_event_call *call = &event_kernel_stack;
2609         struct ring_buffer_event *event;
2610         struct stack_entry *entry;
2611         struct stack_trace trace;
2612         int use_stack;
2613         int size = FTRACE_STACK_ENTRIES;
2614
2615         trace.nr_entries        = 0;
2616         trace.skip              = skip;
2617
2618         /*
2619          * Add one, for this function and the call to save_stack_trace()
2620          * If regs is set, then these functions will not be in the way.
2621          */
2622 #ifndef CONFIG_UNWINDER_ORC
2623         if (!regs)
2624                 trace.skip++;
2625 #endif
2626
2627         /*
2628          * Since events can happen in NMIs there's no safe way to
2629          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2630          * or NMI comes in, it will just have to use the default
2631          * FTRACE_STACK_SIZE.
2632          */
2633         preempt_disable_notrace();
2634
2635         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2636         /*
2637          * We don't need any atomic variables, just a barrier.
2638          * If an interrupt comes in, we don't care, because it would
2639          * have exited and put the counter back to what we want.
2640          * We just need a barrier to keep gcc from moving things
2641          * around.
2642          */
2643         barrier();
2644         if (use_stack == 1) {
2645                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2646                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2647
2648                 if (regs)
2649                         save_stack_trace_regs(regs, &trace);
2650                 else
2651                         save_stack_trace(&trace);
2652
2653                 if (trace.nr_entries > size)
2654                         size = trace.nr_entries;
2655         } else
2656                 /* From now on, use_stack is a boolean */
2657                 use_stack = 0;
2658
2659         size *= sizeof(unsigned long);
2660
2661         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2662                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2663                                     flags, pc);
2664         if (!event)
2665                 goto out;
2666         entry = ring_buffer_event_data(event);
2667
2668         memset(&entry->caller, 0, size);
2669
2670         if (use_stack)
2671                 memcpy(&entry->caller, trace.entries,
2672                        trace.nr_entries * sizeof(unsigned long));
2673         else {
2674                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2675                 trace.entries           = entry->caller;
2676                 if (regs)
2677                         save_stack_trace_regs(regs, &trace);
2678                 else
2679                         save_stack_trace(&trace);
2680         }
2681
2682         entry->size = trace.nr_entries;
2683
2684         if (!call_filter_check_discard(call, entry, buffer, event))
2685                 __buffer_unlock_commit(buffer, event);
2686
2687  out:
2688         /* Again, don't let gcc optimize things here */
2689         barrier();
2690         __this_cpu_dec(ftrace_stack_reserve);
2691         preempt_enable_notrace();
2692
2693 }
2694
2695 static inline void ftrace_trace_stack(struct trace_array *tr,
2696                                       struct ring_buffer *buffer,
2697                                       unsigned long flags,
2698                                       int skip, int pc, struct pt_regs *regs)
2699 {
2700         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2701                 return;
2702
2703         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2704 }
2705
2706 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2707                    int pc)
2708 {
2709         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2710
2711         if (rcu_is_watching()) {
2712                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2713                 return;
2714         }
2715
2716         /*
2717          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2718          * but if the above rcu_is_watching() failed, then the NMI
2719          * triggered someplace critical, and rcu_irq_enter() should
2720          * not be called from NMI.
2721          */
2722         if (unlikely(in_nmi()))
2723                 return;
2724
2725         rcu_irq_enter_irqson();
2726         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2727         rcu_irq_exit_irqson();
2728 }
2729
2730 /**
2731  * trace_dump_stack - record a stack back trace in the trace buffer
2732  * @skip: Number of functions to skip (helper handlers)
2733  */
2734 void trace_dump_stack(int skip)
2735 {
2736         unsigned long flags;
2737
2738         if (tracing_disabled || tracing_selftest_running)
2739                 return;
2740
2741         local_save_flags(flags);
2742
2743 #ifndef CONFIG_UNWINDER_ORC
2744         /* Skip 1 to skip this function. */
2745         skip++;
2746 #endif
2747         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2748                              flags, skip, preempt_count(), NULL);
2749 }
2750
2751 static DEFINE_PER_CPU(int, user_stack_count);
2752
2753 void
2754 ftrace_trace_userstack(struct trace_array *tr,
2755                        struct ring_buffer *buffer, unsigned long flags, int pc)
2756 {
2757         struct trace_event_call *call = &event_user_stack;
2758         struct ring_buffer_event *event;
2759         struct userstack_entry *entry;
2760         struct stack_trace trace;
2761
2762         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2763                 return;
2764
2765         /*
2766          * NMIs can not handle page faults, even with fix ups.
2767          * The save user stack can (and often does) fault.
2768          */
2769         if (unlikely(in_nmi()))
2770                 return;
2771
2772         /*
2773          * prevent recursion, since the user stack tracing may
2774          * trigger other kernel events.
2775          */
2776         preempt_disable();
2777         if (__this_cpu_read(user_stack_count))
2778                 goto out;
2779
2780         __this_cpu_inc(user_stack_count);
2781
2782         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2783                                             sizeof(*entry), flags, pc);
2784         if (!event)
2785                 goto out_drop_count;
2786         entry   = ring_buffer_event_data(event);
2787
2788         entry->tgid             = current->tgid;
2789         memset(&entry->caller, 0, sizeof(entry->caller));
2790
2791         trace.nr_entries        = 0;
2792         trace.max_entries       = FTRACE_STACK_ENTRIES;
2793         trace.skip              = 0;
2794         trace.entries           = entry->caller;
2795
2796         save_stack_trace_user(&trace);
2797         if (!call_filter_check_discard(call, entry, buffer, event))
2798                 __buffer_unlock_commit(buffer, event);
2799
2800  out_drop_count:
2801         __this_cpu_dec(user_stack_count);
2802  out:
2803         preempt_enable();
2804 }
2805
2806 #ifdef UNUSED
2807 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2808 {
2809         ftrace_trace_userstack(tr, flags, preempt_count());
2810 }
2811 #endif /* UNUSED */
2812
2813 #endif /* CONFIG_STACKTRACE */
2814
2815 /* created for use with alloc_percpu */
2816 struct trace_buffer_struct {
2817         int nesting;
2818         char buffer[4][TRACE_BUF_SIZE];
2819 };
2820
2821 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
2822
2823 /*
2824  * Thise allows for lockless recording.  If we're nested too deeply, then
2825  * this returns NULL.
2826  */
2827 static char *get_trace_buf(void)
2828 {
2829         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2830
2831         if (!trace_percpu_buffer || buffer->nesting >= 4)
2832                 return NULL;
2833
2834         buffer->nesting++;
2835
2836         /* Interrupts must see nesting incremented before we use the buffer */
2837         barrier();
2838         return &buffer->buffer[buffer->nesting - 1][0];
2839 }
2840
2841 static void put_trace_buf(void)
2842 {
2843         /* Don't let the decrement of nesting leak before this */
2844         barrier();
2845         this_cpu_dec(trace_percpu_buffer->nesting);
2846 }
2847
2848 static int alloc_percpu_trace_buffer(void)
2849 {
2850         struct trace_buffer_struct __percpu *buffers;
2851
2852         buffers = alloc_percpu(struct trace_buffer_struct);
2853         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2854                 return -ENOMEM;
2855
2856         trace_percpu_buffer = buffers;
2857         return 0;
2858 }
2859
2860 static int buffers_allocated;
2861
2862 void trace_printk_init_buffers(void)
2863 {
2864         if (buffers_allocated)
2865                 return;
2866
2867         if (alloc_percpu_trace_buffer())
2868                 return;
2869
2870         /* trace_printk() is for debug use only. Don't use it in production. */
2871
2872         pr_warn("\n");
2873         pr_warn("**********************************************************\n");
2874         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2875         pr_warn("**                                                      **\n");
2876         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2877         pr_warn("**                                                      **\n");
2878         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2879         pr_warn("** unsafe for production use.                           **\n");
2880         pr_warn("**                                                      **\n");
2881         pr_warn("** If you see this message and you are not debugging    **\n");
2882         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2883         pr_warn("**                                                      **\n");
2884         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2885         pr_warn("**********************************************************\n");
2886
2887         /* Expand the buffers to set size */
2888         tracing_update_buffers();
2889
2890         buffers_allocated = 1;
2891
2892         /*
2893          * trace_printk_init_buffers() can be called by modules.
2894          * If that happens, then we need to start cmdline recording
2895          * directly here. If the global_trace.buffer is already
2896          * allocated here, then this was called by module code.
2897          */
2898         if (global_trace.trace_buffer.buffer)
2899                 tracing_start_cmdline_record();
2900 }
2901
2902 void trace_printk_start_comm(void)
2903 {
2904         /* Start tracing comms if trace printk is set */
2905         if (!buffers_allocated)
2906                 return;
2907         tracing_start_cmdline_record();
2908 }
2909
2910 static void trace_printk_start_stop_comm(int enabled)
2911 {
2912         if (!buffers_allocated)
2913                 return;
2914
2915         if (enabled)
2916                 tracing_start_cmdline_record();
2917         else
2918                 tracing_stop_cmdline_record();
2919 }
2920
2921 /**
2922  * trace_vbprintk - write binary msg to tracing buffer
2923  *
2924  */
2925 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2926 {
2927         struct trace_event_call *call = &event_bprint;
2928         struct ring_buffer_event *event;
2929         struct ring_buffer *buffer;
2930         struct trace_array *tr = &global_trace;
2931         struct bprint_entry *entry;
2932         unsigned long flags;
2933         char *tbuffer;
2934         int len = 0, size, pc;
2935
2936         if (unlikely(tracing_selftest_running || tracing_disabled))
2937                 return 0;
2938
2939         /* Don't pollute graph traces with trace_vprintk internals */
2940         pause_graph_tracing();
2941
2942         pc = preempt_count();
2943         preempt_disable_notrace();
2944
2945         tbuffer = get_trace_buf();
2946         if (!tbuffer) {
2947                 len = 0;
2948                 goto out_nobuffer;
2949         }
2950
2951         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2952
2953         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2954                 goto out;
2955
2956         local_save_flags(flags);
2957         size = sizeof(*entry) + sizeof(u32) * len;
2958         buffer = tr->trace_buffer.buffer;
2959         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2960                                             flags, pc);
2961         if (!event)
2962                 goto out;
2963         entry = ring_buffer_event_data(event);
2964         entry->ip                       = ip;
2965         entry->fmt                      = fmt;
2966
2967         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2968         if (!call_filter_check_discard(call, entry, buffer, event)) {
2969                 __buffer_unlock_commit(buffer, event);
2970                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2971         }
2972
2973 out:
2974         put_trace_buf();
2975
2976 out_nobuffer:
2977         preempt_enable_notrace();
2978         unpause_graph_tracing();
2979
2980         return len;
2981 }
2982 EXPORT_SYMBOL_GPL(trace_vbprintk);
2983
2984 __printf(3, 0)
2985 static int
2986 __trace_array_vprintk(struct ring_buffer *buffer,
2987                       unsigned long ip, const char *fmt, va_list args)
2988 {
2989         struct trace_event_call *call = &event_print;
2990         struct ring_buffer_event *event;
2991         int len = 0, size, pc;
2992         struct print_entry *entry;
2993         unsigned long flags;
2994         char *tbuffer;
2995
2996         if (tracing_disabled || tracing_selftest_running)
2997                 return 0;
2998
2999         /* Don't pollute graph traces with trace_vprintk internals */
3000         pause_graph_tracing();
3001
3002         pc = preempt_count();
3003         preempt_disable_notrace();
3004
3005
3006         tbuffer = get_trace_buf();
3007         if (!tbuffer) {
3008                 len = 0;
3009                 goto out_nobuffer;
3010         }
3011
3012         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3013
3014         local_save_flags(flags);
3015         size = sizeof(*entry) + len + 1;
3016         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3017                                             flags, pc);
3018         if (!event)
3019                 goto out;
3020         entry = ring_buffer_event_data(event);
3021         entry->ip = ip;
3022
3023         memcpy(&entry->buf, tbuffer, len + 1);
3024         if (!call_filter_check_discard(call, entry, buffer, event)) {
3025                 __buffer_unlock_commit(buffer, event);
3026                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3027         }
3028
3029 out:
3030         put_trace_buf();
3031
3032 out_nobuffer:
3033         preempt_enable_notrace();
3034         unpause_graph_tracing();
3035
3036         return len;
3037 }
3038
3039 __printf(3, 0)
3040 int trace_array_vprintk(struct trace_array *tr,
3041                         unsigned long ip, const char *fmt, va_list args)
3042 {
3043         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3044 }
3045
3046 __printf(3, 0)
3047 int trace_array_printk(struct trace_array *tr,
3048                        unsigned long ip, const char *fmt, ...)
3049 {
3050         int ret;
3051         va_list ap;
3052
3053         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3054                 return 0;
3055
3056         if (!tr)
3057                 return -ENOENT;
3058
3059         va_start(ap, fmt);
3060         ret = trace_array_vprintk(tr, ip, fmt, ap);
3061         va_end(ap);
3062         return ret;
3063 }
3064
3065 __printf(3, 4)
3066 int trace_array_printk_buf(struct ring_buffer *buffer,
3067                            unsigned long ip, const char *fmt, ...)
3068 {
3069         int ret;
3070         va_list ap;
3071
3072         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3073                 return 0;
3074
3075         va_start(ap, fmt);
3076         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3077         va_end(ap);
3078         return ret;
3079 }
3080
3081 __printf(2, 0)
3082 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3083 {
3084         return trace_array_vprintk(&global_trace, ip, fmt, args);
3085 }
3086 EXPORT_SYMBOL_GPL(trace_vprintk);
3087
3088 static void trace_iterator_increment(struct trace_iterator *iter)
3089 {
3090         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3091
3092         iter->idx++;
3093         if (buf_iter)
3094                 ring_buffer_read(buf_iter, NULL);
3095 }
3096
3097 static struct trace_entry *
3098 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3099                 unsigned long *lost_events)
3100 {
3101         struct ring_buffer_event *event;
3102         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3103
3104         if (buf_iter)
3105                 event = ring_buffer_iter_peek(buf_iter, ts);
3106         else
3107                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3108                                          lost_events);
3109
3110         if (event) {
3111                 iter->ent_size = ring_buffer_event_length(event);
3112                 return ring_buffer_event_data(event);
3113         }
3114         iter->ent_size = 0;
3115         return NULL;
3116 }
3117
3118 static struct trace_entry *
3119 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3120                   unsigned long *missing_events, u64 *ent_ts)
3121 {
3122         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3123         struct trace_entry *ent, *next = NULL;
3124         unsigned long lost_events = 0, next_lost = 0;
3125         int cpu_file = iter->cpu_file;
3126         u64 next_ts = 0, ts;
3127         int next_cpu = -1;
3128         int next_size = 0;
3129         int cpu;
3130
3131         /*
3132          * If we are in a per_cpu trace file, don't bother by iterating over
3133          * all cpu and peek directly.
3134          */
3135         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3136                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3137                         return NULL;
3138                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3139                 if (ent_cpu)
3140                         *ent_cpu = cpu_file;
3141
3142                 return ent;
3143         }
3144
3145         for_each_tracing_cpu(cpu) {
3146
3147                 if (ring_buffer_empty_cpu(buffer, cpu))
3148                         continue;
3149
3150                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3151
3152                 /*
3153                  * Pick the entry with the smallest timestamp:
3154                  */
3155                 if (ent && (!next || ts < next_ts)) {
3156                         next = ent;
3157                         next_cpu = cpu;
3158                         next_ts = ts;
3159                         next_lost = lost_events;
3160                         next_size = iter->ent_size;
3161                 }
3162         }
3163
3164         iter->ent_size = next_size;
3165
3166         if (ent_cpu)
3167                 *ent_cpu = next_cpu;
3168
3169         if (ent_ts)
3170                 *ent_ts = next_ts;
3171
3172         if (missing_events)
3173                 *missing_events = next_lost;
3174
3175         return next;
3176 }
3177
3178 /* Find the next real entry, without updating the iterator itself */
3179 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3180                                           int *ent_cpu, u64 *ent_ts)
3181 {
3182         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3183 }
3184
3185 /* Find the next real entry, and increment the iterator to the next entry */
3186 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3187 {
3188         iter->ent = __find_next_entry(iter, &iter->cpu,
3189                                       &iter->lost_events, &iter->ts);
3190
3191         if (iter->ent)
3192                 trace_iterator_increment(iter);
3193
3194         return iter->ent ? iter : NULL;
3195 }
3196
3197 static void trace_consume(struct trace_iterator *iter)
3198 {
3199         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3200                             &iter->lost_events);
3201 }
3202
3203 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3204 {
3205         struct trace_iterator *iter = m->private;
3206         int i = (int)*pos;
3207         void *ent;
3208
3209         WARN_ON_ONCE(iter->leftover);
3210
3211         (*pos)++;
3212
3213         /* can't go backwards */
3214         if (iter->idx > i)
3215                 return NULL;
3216
3217         if (iter->idx < 0)
3218                 ent = trace_find_next_entry_inc(iter);
3219         else
3220                 ent = iter;
3221
3222         while (ent && iter->idx < i)
3223                 ent = trace_find_next_entry_inc(iter);
3224
3225         iter->pos = *pos;
3226
3227         return ent;
3228 }
3229
3230 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3231 {
3232         struct ring_buffer_event *event;
3233         struct ring_buffer_iter *buf_iter;
3234         unsigned long entries = 0;
3235         u64 ts;
3236
3237         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3238
3239         buf_iter = trace_buffer_iter(iter, cpu);
3240         if (!buf_iter)
3241                 return;
3242
3243         ring_buffer_iter_reset(buf_iter);
3244
3245         /*
3246          * We could have the case with the max latency tracers
3247          * that a reset never took place on a cpu. This is evident
3248          * by the timestamp being before the start of the buffer.
3249          */
3250         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3251                 if (ts >= iter->trace_buffer->time_start)
3252                         break;
3253                 entries++;
3254                 ring_buffer_read(buf_iter, NULL);
3255         }
3256
3257         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3258 }
3259
3260 /*
3261  * The current tracer is copied to avoid a global locking
3262  * all around.
3263  */
3264 static void *s_start(struct seq_file *m, loff_t *pos)
3265 {
3266         struct trace_iterator *iter = m->private;
3267         struct trace_array *tr = iter->tr;
3268         int cpu_file = iter->cpu_file;
3269         void *p = NULL;
3270         loff_t l = 0;
3271         int cpu;
3272
3273         /*
3274          * copy the tracer to avoid using a global lock all around.
3275          * iter->trace is a copy of current_trace, the pointer to the
3276          * name may be used instead of a strcmp(), as iter->trace->name
3277          * will point to the same string as current_trace->name.
3278          */
3279         mutex_lock(&trace_types_lock);
3280         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3281                 *iter->trace = *tr->current_trace;
3282         mutex_unlock(&trace_types_lock);
3283
3284 #ifdef CONFIG_TRACER_MAX_TRACE
3285         if (iter->snapshot && iter->trace->use_max_tr)
3286                 return ERR_PTR(-EBUSY);
3287 #endif
3288
3289         if (*pos != iter->pos) {
3290                 iter->ent = NULL;
3291                 iter->cpu = 0;
3292                 iter->idx = -1;
3293
3294                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3295                         for_each_tracing_cpu(cpu)
3296                                 tracing_iter_reset(iter, cpu);
3297                 } else
3298                         tracing_iter_reset(iter, cpu_file);
3299
3300                 iter->leftover = 0;
3301                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3302                         ;
3303
3304         } else {
3305                 /*
3306                  * If we overflowed the seq_file before, then we want
3307                  * to just reuse the trace_seq buffer again.
3308                  */
3309                 if (iter->leftover)
3310                         p = iter;
3311                 else {
3312                         l = *pos - 1;
3313                         p = s_next(m, p, &l);
3314                 }
3315         }
3316
3317         trace_event_read_lock();
3318         trace_access_lock(cpu_file);
3319         return p;
3320 }
3321
3322 static void s_stop(struct seq_file *m, void *p)
3323 {
3324         struct trace_iterator *iter = m->private;
3325
3326 #ifdef CONFIG_TRACER_MAX_TRACE
3327         if (iter->snapshot && iter->trace->use_max_tr)
3328                 return;
3329 #endif
3330
3331         trace_access_unlock(iter->cpu_file);
3332         trace_event_read_unlock();
3333 }
3334
3335 static void
3336 get_total_entries(struct trace_buffer *buf,
3337                   unsigned long *total, unsigned long *entries)
3338 {
3339         unsigned long count;
3340         int cpu;
3341
3342         *total = 0;
3343         *entries = 0;
3344
3345         for_each_tracing_cpu(cpu) {
3346                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3347                 /*
3348                  * If this buffer has skipped entries, then we hold all
3349                  * entries for the trace and we need to ignore the
3350                  * ones before the time stamp.
3351                  */
3352                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3353                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3354                         /* total is the same as the entries */
3355                         *total += count;
3356                 } else
3357                         *total += count +
3358                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3359                 *entries += count;
3360         }
3361 }
3362
3363 static void print_lat_help_header(struct seq_file *m)
3364 {
3365         seq_puts(m, "#                  _------=> CPU#            \n"
3366                     "#                 / _-----=> irqs-off        \n"
3367                     "#                | / _----=> need-resched    \n"
3368                     "#                || / _---=> hardirq/softirq \n"
3369                     "#                ||| / _--=> preempt-depth   \n"
3370                     "#                |||| /     delay            \n"
3371                     "#  cmd     pid   ||||| time  |   caller      \n"
3372                     "#     \\   /      |||||  \\    |   /         \n");
3373 }
3374
3375 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3376 {
3377         unsigned long total;
3378         unsigned long entries;
3379
3380         get_total_entries(buf, &total, &entries);
3381         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3382                    entries, total, num_online_cpus());
3383         seq_puts(m, "#\n");
3384 }
3385
3386 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3387                                    unsigned int flags)
3388 {
3389         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3390
3391         print_event_info(buf, m);
3392
3393         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3394         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3395 }
3396
3397 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3398                                        unsigned int flags)
3399 {
3400         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3401         const char tgid_space[] = "          ";
3402         const char space[] = "  ";
3403
3404         print_event_info(buf, m);
3405
3406         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3407                    tgid ? tgid_space : space);
3408         seq_printf(m, "#                          %s / _----=> need-resched\n",
3409                    tgid ? tgid_space : space);
3410         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3411                    tgid ? tgid_space : space);
3412         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3413                    tgid ? tgid_space : space);
3414         seq_printf(m, "#                          %s||| /     delay\n",
3415                    tgid ? tgid_space : space);
3416         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3417                    tgid ? "   TGID   " : space);
3418         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3419                    tgid ? "     |    " : space);
3420 }
3421
3422 void
3423 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3424 {
3425         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3426         struct trace_buffer *buf = iter->trace_buffer;
3427         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3428         struct tracer *type = iter->trace;
3429         unsigned long entries;
3430         unsigned long total;
3431         const char *name = "preemption";
3432
3433         name = type->name;
3434
3435         get_total_entries(buf, &total, &entries);
3436
3437         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3438                    name, UTS_RELEASE);
3439         seq_puts(m, "# -----------------------------------"
3440                  "---------------------------------\n");
3441         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3442                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3443                    nsecs_to_usecs(data->saved_latency),
3444                    entries,
3445                    total,
3446                    buf->cpu,
3447 #if defined(CONFIG_PREEMPT_NONE)
3448                    "server",
3449 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3450                    "desktop",
3451 #elif defined(CONFIG_PREEMPT)
3452                    "preempt",
3453 #else
3454                    "unknown",
3455 #endif
3456                    /* These are reserved for later use */
3457                    0, 0, 0, 0);
3458 #ifdef CONFIG_SMP
3459         seq_printf(m, " #P:%d)\n", num_online_cpus());
3460 #else
3461         seq_puts(m, ")\n");
3462 #endif
3463         seq_puts(m, "#    -----------------\n");
3464         seq_printf(m, "#    | task: %.16s-%d "
3465                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3466                    data->comm, data->pid,
3467                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3468                    data->policy, data->rt_priority);
3469         seq_puts(m, "#    -----------------\n");
3470
3471         if (data->critical_start) {
3472                 seq_puts(m, "#  => started at: ");
3473                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3474                 trace_print_seq(m, &iter->seq);
3475                 seq_puts(m, "\n#  => ended at:   ");
3476                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3477                 trace_print_seq(m, &iter->seq);
3478                 seq_puts(m, "\n#\n");
3479         }
3480
3481         seq_puts(m, "#\n");
3482 }
3483
3484 static void test_cpu_buff_start(struct trace_iterator *iter)
3485 {
3486         struct trace_seq *s = &iter->seq;
3487         struct trace_array *tr = iter->tr;
3488
3489         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3490                 return;
3491
3492         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3493                 return;
3494
3495         if (cpumask_available(iter->started) &&
3496             cpumask_test_cpu(iter->cpu, iter->started))
3497                 return;
3498
3499         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3500                 return;
3501
3502         if (cpumask_available(iter->started))
3503                 cpumask_set_cpu(iter->cpu, iter->started);
3504
3505         /* Don't print started cpu buffer for the first entry of the trace */
3506         if (iter->idx > 1)
3507                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3508                                 iter->cpu);
3509 }
3510
3511 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3512 {
3513         struct trace_array *tr = iter->tr;
3514         struct trace_seq *s = &iter->seq;
3515         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3516         struct trace_entry *entry;
3517         struct trace_event *event;
3518
3519         entry = iter->ent;
3520
3521         test_cpu_buff_start(iter);
3522
3523         event = ftrace_find_event(entry->type);
3524
3525         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3526                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3527                         trace_print_lat_context(iter);
3528                 else
3529                         trace_print_context(iter);
3530         }
3531
3532         if (trace_seq_has_overflowed(s))
3533                 return TRACE_TYPE_PARTIAL_LINE;
3534
3535         if (event)
3536                 return event->funcs->trace(iter, sym_flags, event);
3537
3538         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3539
3540         return trace_handle_return(s);
3541 }
3542
3543 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3544 {
3545         struct trace_array *tr = iter->tr;
3546         struct trace_seq *s = &iter->seq;
3547         struct trace_entry *entry;
3548         struct trace_event *event;
3549
3550         entry = iter->ent;
3551
3552         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3553                 trace_seq_printf(s, "%d %d %llu ",
3554                                  entry->pid, iter->cpu, iter->ts);
3555
3556         if (trace_seq_has_overflowed(s))
3557                 return TRACE_TYPE_PARTIAL_LINE;
3558
3559         event = ftrace_find_event(entry->type);
3560         if (event)
3561                 return event->funcs->raw(iter, 0, event);
3562
3563         trace_seq_printf(s, "%d ?\n", entry->type);
3564
3565         return trace_handle_return(s);
3566 }
3567
3568 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3569 {
3570         struct trace_array *tr = iter->tr;
3571         struct trace_seq *s = &iter->seq;
3572         unsigned char newline = '\n';
3573         struct trace_entry *entry;
3574         struct trace_event *event;
3575
3576         entry = iter->ent;
3577
3578         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3580                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3581                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3582                 if (trace_seq_has_overflowed(s))
3583                         return TRACE_TYPE_PARTIAL_LINE;
3584         }
3585
3586         event = ftrace_find_event(entry->type);
3587         if (event) {
3588                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3589                 if (ret != TRACE_TYPE_HANDLED)
3590                         return ret;
3591         }
3592
3593         SEQ_PUT_FIELD(s, newline);
3594
3595         return trace_handle_return(s);
3596 }
3597
3598 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3599 {
3600         struct trace_array *tr = iter->tr;
3601         struct trace_seq *s = &iter->seq;
3602         struct trace_entry *entry;
3603         struct trace_event *event;
3604
3605         entry = iter->ent;
3606
3607         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3608                 SEQ_PUT_FIELD(s, entry->pid);
3609                 SEQ_PUT_FIELD(s, iter->cpu);
3610                 SEQ_PUT_FIELD(s, iter->ts);
3611                 if (trace_seq_has_overflowed(s))
3612                         return TRACE_TYPE_PARTIAL_LINE;
3613         }
3614
3615         event = ftrace_find_event(entry->type);
3616         return event ? event->funcs->binary(iter, 0, event) :
3617                 TRACE_TYPE_HANDLED;
3618 }
3619
3620 int trace_empty(struct trace_iterator *iter)
3621 {
3622         struct ring_buffer_iter *buf_iter;
3623         int cpu;
3624
3625         /* If we are looking at one CPU buffer, only check that one */
3626         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3627                 cpu = iter->cpu_file;
3628                 buf_iter = trace_buffer_iter(iter, cpu);
3629                 if (buf_iter) {
3630                         if (!ring_buffer_iter_empty(buf_iter))
3631                                 return 0;
3632                 } else {
3633                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3634                                 return 0;
3635                 }
3636                 return 1;
3637         }
3638
3639         for_each_tracing_cpu(cpu) {
3640                 buf_iter = trace_buffer_iter(iter, cpu);
3641                 if (buf_iter) {
3642                         if (!ring_buffer_iter_empty(buf_iter))
3643                                 return 0;
3644                 } else {
3645                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3646                                 return 0;
3647                 }
3648         }
3649
3650         return 1;
3651 }
3652
3653 /*  Called with trace_event_read_lock() held. */
3654 enum print_line_t print_trace_line(struct trace_iterator *iter)
3655 {
3656         struct trace_array *tr = iter->tr;
3657         unsigned long trace_flags = tr->trace_flags;
3658         enum print_line_t ret;
3659
3660         if (iter->lost_events) {
3661                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3662                                  iter->cpu, iter->lost_events);
3663                 if (trace_seq_has_overflowed(&iter->seq))
3664                         return TRACE_TYPE_PARTIAL_LINE;
3665         }
3666
3667         if (iter->trace && iter->trace->print_line) {
3668                 ret = iter->trace->print_line(iter);
3669                 if (ret != TRACE_TYPE_UNHANDLED)
3670                         return ret;
3671         }
3672
3673         if (iter->ent->type == TRACE_BPUTS &&
3674                         trace_flags & TRACE_ITER_PRINTK &&
3675                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3676                 return trace_print_bputs_msg_only(iter);
3677
3678         if (iter->ent->type == TRACE_BPRINT &&
3679                         trace_flags & TRACE_ITER_PRINTK &&
3680                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3681                 return trace_print_bprintk_msg_only(iter);
3682
3683         if (iter->ent->type == TRACE_PRINT &&
3684                         trace_flags & TRACE_ITER_PRINTK &&
3685                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3686                 return trace_print_printk_msg_only(iter);
3687
3688         if (trace_flags & TRACE_ITER_BIN)
3689                 return print_bin_fmt(iter);
3690
3691         if (trace_flags & TRACE_ITER_HEX)
3692                 return print_hex_fmt(iter);
3693
3694         if (trace_flags & TRACE_ITER_RAW)
3695                 return print_raw_fmt(iter);
3696
3697         return print_trace_fmt(iter);
3698 }
3699
3700 void trace_latency_header(struct seq_file *m)
3701 {
3702         struct trace_iterator *iter = m->private;
3703         struct trace_array *tr = iter->tr;
3704
3705         /* print nothing if the buffers are empty */
3706         if (trace_empty(iter))
3707                 return;
3708
3709         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3710                 print_trace_header(m, iter);
3711
3712         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3713                 print_lat_help_header(m);
3714 }
3715
3716 void trace_default_header(struct seq_file *m)
3717 {
3718         struct trace_iterator *iter = m->private;
3719         struct trace_array *tr = iter->tr;
3720         unsigned long trace_flags = tr->trace_flags;
3721
3722         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3723                 return;
3724
3725         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3726                 /* print nothing if the buffers are empty */
3727                 if (trace_empty(iter))
3728                         return;
3729                 print_trace_header(m, iter);
3730                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3731                         print_lat_help_header(m);
3732         } else {
3733                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3734                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3735                                 print_func_help_header_irq(iter->trace_buffer,
3736                                                            m, trace_flags);
3737                         else
3738                                 print_func_help_header(iter->trace_buffer, m,
3739                                                        trace_flags);
3740                 }
3741         }
3742 }
3743
3744 static void test_ftrace_alive(struct seq_file *m)
3745 {
3746         if (!ftrace_is_dead())
3747                 return;
3748         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3749                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3750 }
3751
3752 #ifdef CONFIG_TRACER_MAX_TRACE
3753 static void show_snapshot_main_help(struct seq_file *m)
3754 {
3755         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3756                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3757                     "#                      Takes a snapshot of the main buffer.\n"
3758                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3759                     "#                      (Doesn't have to be '2' works with any number that\n"
3760                     "#                       is not a '0' or '1')\n");
3761 }
3762
3763 static void show_snapshot_percpu_help(struct seq_file *m)
3764 {
3765         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3766 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3767         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3769 #else
3770         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3771                     "#                     Must use main snapshot file to allocate.\n");
3772 #endif
3773         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3774                     "#                      (Doesn't have to be '2' works with any number that\n"
3775                     "#                       is not a '0' or '1')\n");
3776 }
3777
3778 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3779 {
3780         if (iter->tr->allocated_snapshot)
3781                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3782         else
3783                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3784
3785         seq_puts(m, "# Snapshot commands:\n");
3786         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3787                 show_snapshot_main_help(m);
3788         else
3789                 show_snapshot_percpu_help(m);
3790 }
3791 #else
3792 /* Should never be called */
3793 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3794 #endif
3795
3796 static int s_show(struct seq_file *m, void *v)
3797 {
3798         struct trace_iterator *iter = v;
3799         int ret;
3800
3801         if (iter->ent == NULL) {
3802                 if (iter->tr) {
3803                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3804                         seq_puts(m, "#\n");
3805                         test_ftrace_alive(m);
3806                 }
3807                 if (iter->snapshot && trace_empty(iter))
3808                         print_snapshot_help(m, iter);
3809                 else if (iter->trace && iter->trace->print_header)
3810                         iter->trace->print_header(m);
3811                 else
3812                         trace_default_header(m);
3813
3814         } else if (iter->leftover) {
3815                 /*
3816                  * If we filled the seq_file buffer earlier, we
3817                  * want to just show it now.
3818                  */
3819                 ret = trace_print_seq(m, &iter->seq);
3820
3821                 /* ret should this time be zero, but you never know */
3822                 iter->leftover = ret;
3823
3824         } else {
3825                 print_trace_line(iter);
3826                 ret = trace_print_seq(m, &iter->seq);
3827                 /*
3828                  * If we overflow the seq_file buffer, then it will
3829                  * ask us for this data again at start up.
3830                  * Use that instead.
3831                  *  ret is 0 if seq_file write succeeded.
3832                  *        -1 otherwise.
3833                  */
3834                 iter->leftover = ret;
3835         }
3836
3837         return 0;
3838 }
3839
3840 /*
3841  * Should be used after trace_array_get(), trace_types_lock
3842  * ensures that i_cdev was already initialized.
3843  */
3844 static inline int tracing_get_cpu(struct inode *inode)
3845 {
3846         if (inode->i_cdev) /* See trace_create_cpu_file() */
3847                 return (long)inode->i_cdev - 1;
3848         return RING_BUFFER_ALL_CPUS;
3849 }
3850
3851 static const struct seq_operations tracer_seq_ops = {
3852         .start          = s_start,
3853         .next           = s_next,
3854         .stop           = s_stop,
3855         .show           = s_show,
3856 };
3857
3858 static struct trace_iterator *
3859 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3860 {
3861         struct trace_array *tr = inode->i_private;
3862         struct trace_iterator *iter;
3863         int cpu;
3864
3865         if (tracing_disabled)
3866                 return ERR_PTR(-ENODEV);
3867
3868         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3869         if (!iter)
3870                 return ERR_PTR(-ENOMEM);
3871
3872         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3873                                     GFP_KERNEL);
3874         if (!iter->buffer_iter)
3875                 goto release;
3876
3877         /*
3878          * We make a copy of the current tracer to avoid concurrent
3879          * changes on it while we are reading.
3880          */
3881         mutex_lock(&trace_types_lock);
3882         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3883         if (!iter->trace)
3884                 goto fail;
3885
3886         *iter->trace = *tr->current_trace;
3887
3888         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3889                 goto fail;
3890
3891         iter->tr = tr;
3892
3893 #ifdef CONFIG_TRACER_MAX_TRACE
3894         /* Currently only the top directory has a snapshot */
3895         if (tr->current_trace->print_max || snapshot)
3896                 iter->trace_buffer = &tr->max_buffer;
3897         else
3898 #endif
3899                 iter->trace_buffer = &tr->trace_buffer;
3900         iter->snapshot = snapshot;
3901         iter->pos = -1;
3902         iter->cpu_file = tracing_get_cpu(inode);
3903         mutex_init(&iter->mutex);
3904
3905         /* Notify the tracer early; before we stop tracing. */
3906         if (iter->trace && iter->trace->open)
3907                 iter->trace->open(iter);
3908
3909         /* Annotate start of buffers if we had overruns */
3910         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3911                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3912
3913         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3914         if (trace_clocks[tr->clock_id].in_ns)
3915                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3916
3917         /* stop the trace while dumping if we are not opening "snapshot" */
3918         if (!iter->snapshot)
3919                 tracing_stop_tr(tr);
3920
3921         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3922                 for_each_tracing_cpu(cpu) {
3923                         iter->buffer_iter[cpu] =
3924                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3925                                                          cpu, GFP_KERNEL);
3926                 }
3927                 ring_buffer_read_prepare_sync();
3928                 for_each_tracing_cpu(cpu) {
3929                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3930                         tracing_iter_reset(iter, cpu);
3931                 }
3932         } else {
3933                 cpu = iter->cpu_file;
3934                 iter->buffer_iter[cpu] =
3935                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3936                                                  cpu, GFP_KERNEL);
3937                 ring_buffer_read_prepare_sync();
3938                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3939                 tracing_iter_reset(iter, cpu);
3940         }
3941
3942         mutex_unlock(&trace_types_lock);
3943
3944         return iter;
3945
3946  fail:
3947         mutex_unlock(&trace_types_lock);
3948         kfree(iter->trace);
3949         kfree(iter->buffer_iter);
3950 release:
3951         seq_release_private(inode, file);
3952         return ERR_PTR(-ENOMEM);
3953 }
3954
3955 int tracing_open_generic(struct inode *inode, struct file *filp)
3956 {
3957         if (tracing_disabled)
3958                 return -ENODEV;
3959
3960         filp->private_data = inode->i_private;
3961         return 0;
3962 }
3963
3964 bool tracing_is_disabled(void)
3965 {
3966         return (tracing_disabled) ? true: false;
3967 }
3968
3969 /*
3970  * Open and update trace_array ref count.
3971  * Must have the current trace_array passed to it.
3972  */
3973 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3974 {
3975         struct trace_array *tr = inode->i_private;
3976
3977         if (tracing_disabled)
3978                 return -ENODEV;
3979
3980         if (trace_array_get(tr) < 0)
3981                 return -ENODEV;
3982
3983         filp->private_data = inode->i_private;
3984
3985         return 0;
3986 }
3987
3988 static int tracing_release(struct inode *inode, struct file *file)
3989 {
3990         struct trace_array *tr = inode->i_private;
3991         struct seq_file *m = file->private_data;
3992         struct trace_iterator *iter;
3993         int cpu;
3994
3995         if (!(file->f_mode & FMODE_READ)) {
3996                 trace_array_put(tr);
3997                 return 0;
3998         }
3999
4000         /* Writes do not use seq_file */
4001         iter = m->private;
4002         mutex_lock(&trace_types_lock);
4003
4004         for_each_tracing_cpu(cpu) {
4005                 if (iter->buffer_iter[cpu])
4006                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4007         }
4008
4009         if (iter->trace && iter->trace->close)
4010                 iter->trace->close(iter);
4011
4012         if (!iter->snapshot)
4013                 /* reenable tracing if it was previously enabled */
4014                 tracing_start_tr(tr);
4015
4016         __trace_array_put(tr);
4017
4018         mutex_unlock(&trace_types_lock);
4019
4020         mutex_destroy(&iter->mutex);
4021         free_cpumask_var(iter->started);
4022         kfree(iter->trace);
4023         kfree(iter->buffer_iter);
4024         seq_release_private(inode, file);
4025
4026         return 0;
4027 }
4028
4029 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4030 {
4031         struct trace_array *tr = inode->i_private;
4032
4033         trace_array_put(tr);
4034         return 0;
4035 }
4036
4037 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4038 {
4039         struct trace_array *tr = inode->i_private;
4040
4041         trace_array_put(tr);
4042
4043         return single_release(inode, file);
4044 }
4045
4046 static int tracing_open(struct inode *inode, struct file *file)
4047 {
4048         struct trace_array *tr = inode->i_private;
4049         struct trace_iterator *iter;
4050         int ret = 0;
4051
4052         if (trace_array_get(tr) < 0)
4053                 return -ENODEV;
4054
4055         /* If this file was open for write, then erase contents */
4056         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4057                 int cpu = tracing_get_cpu(inode);
4058                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4059
4060 #ifdef CONFIG_TRACER_MAX_TRACE
4061                 if (tr->current_trace->print_max)
4062                         trace_buf = &tr->max_buffer;
4063 #endif
4064
4065                 if (cpu == RING_BUFFER_ALL_CPUS)
4066                         tracing_reset_online_cpus(trace_buf);
4067                 else
4068                         tracing_reset(trace_buf, cpu);
4069         }
4070
4071         if (file->f_mode & FMODE_READ) {
4072                 iter = __tracing_open(inode, file, false);
4073                 if (IS_ERR(iter))
4074                         ret = PTR_ERR(iter);
4075                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4076                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4077         }
4078
4079         if (ret < 0)
4080                 trace_array_put(tr);
4081
4082         return ret;
4083 }
4084
4085 /*
4086  * Some tracers are not suitable for instance buffers.
4087  * A tracer is always available for the global array (toplevel)
4088  * or if it explicitly states that it is.
4089  */
4090 static bool
4091 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4092 {
4093         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4094 }
4095
4096 /* Find the next tracer that this trace array may use */
4097 static struct tracer *
4098 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4099 {
4100         while (t && !trace_ok_for_array(t, tr))
4101                 t = t->next;
4102
4103         return t;
4104 }
4105
4106 static void *
4107 t_next(struct seq_file *m, void *v, loff_t *pos)
4108 {
4109         struct trace_array *tr = m->private;
4110         struct tracer *t = v;
4111
4112         (*pos)++;
4113
4114         if (t)
4115                 t = get_tracer_for_array(tr, t->next);
4116
4117         return t;
4118 }
4119
4120 static void *t_start(struct seq_file *m, loff_t *pos)
4121 {
4122         struct trace_array *tr = m->private;
4123         struct tracer *t;
4124         loff_t l = 0;
4125
4126         mutex_lock(&trace_types_lock);
4127
4128         t = get_tracer_for_array(tr, trace_types);
4129         for (; t && l < *pos; t = t_next(m, t, &l))
4130                         ;
4131
4132         return t;
4133 }
4134
4135 static void t_stop(struct seq_file *m, void *p)
4136 {
4137         mutex_unlock(&trace_types_lock);
4138 }
4139
4140 static int t_show(struct seq_file *m, void *v)
4141 {
4142         struct tracer *t = v;
4143
4144         if (!t)
4145                 return 0;
4146
4147         seq_puts(m, t->name);
4148         if (t->next)
4149                 seq_putc(m, ' ');
4150         else
4151                 seq_putc(m, '\n');
4152
4153         return 0;
4154 }
4155
4156 static const struct seq_operations show_traces_seq_ops = {
4157         .start          = t_start,
4158         .next           = t_next,
4159         .stop           = t_stop,
4160         .show           = t_show,
4161 };
4162
4163 static int show_traces_open(struct inode *inode, struct file *file)
4164 {
4165         struct trace_array *tr = inode->i_private;
4166         struct seq_file *m;
4167         int ret;
4168
4169         if (tracing_disabled)
4170                 return -ENODEV;
4171
4172         if (trace_array_get(tr) < 0)
4173                 return -ENODEV;
4174
4175         ret = seq_open(file, &show_traces_seq_ops);
4176         if (ret) {
4177                 trace_array_put(tr);
4178                 return ret;
4179         }
4180
4181         m = file->private_data;
4182         m->private = tr;
4183
4184         return 0;
4185 }
4186
4187 static int show_traces_release(struct inode *inode, struct file *file)
4188 {
4189         struct trace_array *tr = inode->i_private;
4190
4191         trace_array_put(tr);
4192         return seq_release(inode, file);
4193 }
4194
4195 static ssize_t
4196 tracing_write_stub(struct file *filp, const char __user *ubuf,
4197                    size_t count, loff_t *ppos)
4198 {
4199         return count;
4200 }
4201
4202 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4203 {
4204         int ret;
4205
4206         if (file->f_mode & FMODE_READ)
4207                 ret = seq_lseek(file, offset, whence);
4208         else
4209                 file->f_pos = ret = 0;
4210
4211         return ret;
4212 }
4213
4214 static const struct file_operations tracing_fops = {
4215         .open           = tracing_open,
4216         .read           = seq_read,
4217         .write          = tracing_write_stub,
4218         .llseek         = tracing_lseek,
4219         .release        = tracing_release,
4220 };
4221
4222 static const struct file_operations show_traces_fops = {
4223         .open           = show_traces_open,
4224         .read           = seq_read,
4225         .llseek         = seq_lseek,
4226         .release        = show_traces_release,
4227 };
4228
4229 static ssize_t
4230 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4231                      size_t count, loff_t *ppos)
4232 {
4233         struct trace_array *tr = file_inode(filp)->i_private;
4234         char *mask_str;
4235         int len;
4236
4237         len = snprintf(NULL, 0, "%*pb\n",
4238                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4239         mask_str = kmalloc(len, GFP_KERNEL);
4240         if (!mask_str)
4241                 return -ENOMEM;
4242
4243         len = snprintf(mask_str, len, "%*pb\n",
4244                        cpumask_pr_args(tr->tracing_cpumask));
4245         if (len >= count) {
4246                 count = -EINVAL;
4247                 goto out_err;
4248         }
4249         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4250
4251 out_err:
4252         kfree(mask_str);
4253
4254         return count;
4255 }
4256
4257 static ssize_t
4258 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4259                       size_t count, loff_t *ppos)
4260 {
4261         struct trace_array *tr = file_inode(filp)->i_private;
4262         cpumask_var_t tracing_cpumask_new;
4263         int err, cpu;
4264
4265         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4266                 return -ENOMEM;
4267
4268         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4269         if (err)
4270                 goto err_unlock;
4271
4272         local_irq_disable();
4273         arch_spin_lock(&tr->max_lock);
4274         for_each_tracing_cpu(cpu) {
4275                 /*
4276                  * Increase/decrease the disabled counter if we are
4277                  * about to flip a bit in the cpumask:
4278                  */
4279                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4280                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4281                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4282                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4283                 }
4284                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4285                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4286                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4287                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4288                 }
4289         }
4290         arch_spin_unlock(&tr->max_lock);
4291         local_irq_enable();
4292
4293         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4294         free_cpumask_var(tracing_cpumask_new);
4295
4296         return count;
4297
4298 err_unlock:
4299         free_cpumask_var(tracing_cpumask_new);
4300
4301         return err;
4302 }
4303
4304 static const struct file_operations tracing_cpumask_fops = {
4305         .open           = tracing_open_generic_tr,
4306         .read           = tracing_cpumask_read,
4307         .write          = tracing_cpumask_write,
4308         .release        = tracing_release_generic_tr,
4309         .llseek         = generic_file_llseek,
4310 };
4311
4312 static int tracing_trace_options_show(struct seq_file *m, void *v)
4313 {
4314         struct tracer_opt *trace_opts;
4315         struct trace_array *tr = m->private;
4316         u32 tracer_flags;
4317         int i;
4318
4319         mutex_lock(&trace_types_lock);
4320         tracer_flags = tr->current_trace->flags->val;
4321         trace_opts = tr->current_trace->flags->opts;
4322
4323         for (i = 0; trace_options[i]; i++) {
4324                 if (tr->trace_flags & (1 << i))
4325                         seq_printf(m, "%s\n", trace_options[i]);
4326                 else
4327                         seq_printf(m, "no%s\n", trace_options[i]);
4328         }
4329
4330         for (i = 0; trace_opts[i].name; i++) {
4331                 if (tracer_flags & trace_opts[i].bit)
4332                         seq_printf(m, "%s\n", trace_opts[i].name);
4333                 else
4334                         seq_printf(m, "no%s\n", trace_opts[i].name);
4335         }
4336         mutex_unlock(&trace_types_lock);
4337
4338         return 0;
4339 }
4340
4341 static int __set_tracer_option(struct trace_array *tr,
4342                                struct tracer_flags *tracer_flags,
4343                                struct tracer_opt *opts, int neg)
4344 {
4345         struct tracer *trace = tracer_flags->trace;
4346         int ret;
4347
4348         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4349         if (ret)
4350                 return ret;
4351
4352         if (neg)
4353                 tracer_flags->val &= ~opts->bit;
4354         else
4355                 tracer_flags->val |= opts->bit;
4356         return 0;
4357 }
4358
4359 /* Try to assign a tracer specific option */
4360 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4361 {
4362         struct tracer *trace = tr->current_trace;
4363         struct tracer_flags *tracer_flags = trace->flags;
4364         struct tracer_opt *opts = NULL;
4365         int i;
4366
4367         for (i = 0; tracer_flags->opts[i].name; i++) {
4368                 opts = &tracer_flags->opts[i];
4369
4370                 if (strcmp(cmp, opts->name) == 0)
4371                         return __set_tracer_option(tr, trace->flags, opts, neg);
4372         }
4373
4374         return -EINVAL;
4375 }
4376
4377 /* Some tracers require overwrite to stay enabled */
4378 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4379 {
4380         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4381                 return -1;
4382
4383         return 0;
4384 }
4385
4386 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4387 {
4388         int *map;
4389
4390         if ((mask == TRACE_ITER_RECORD_TGID) ||
4391             (mask == TRACE_ITER_RECORD_CMD))
4392                 lockdep_assert_held(&event_mutex);
4393
4394         /* do nothing if flag is already set */
4395         if (!!(tr->trace_flags & mask) == !!enabled)
4396                 return 0;
4397
4398         /* Give the tracer a chance to approve the change */
4399         if (tr->current_trace->flag_changed)
4400                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4401                         return -EINVAL;
4402
4403         if (enabled)
4404                 tr->trace_flags |= mask;
4405         else
4406                 tr->trace_flags &= ~mask;
4407
4408         if (mask == TRACE_ITER_RECORD_CMD)
4409                 trace_event_enable_cmd_record(enabled);
4410
4411         if (mask == TRACE_ITER_RECORD_TGID) {
4412                 if (!tgid_map) {
4413                         tgid_map_max = pid_max;
4414                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4415                                        GFP_KERNEL);
4416
4417                         /*
4418                          * Pairs with smp_load_acquire() in
4419                          * trace_find_tgid_ptr() to ensure that if it observes
4420                          * the tgid_map we just allocated then it also observes
4421                          * the corresponding tgid_map_max value.
4422                          */
4423                         smp_store_release(&tgid_map, map);
4424                 }
4425                 if (!tgid_map) {
4426                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4427                         return -ENOMEM;
4428                 }
4429
4430                 trace_event_enable_tgid_record(enabled);
4431         }
4432
4433         if (mask == TRACE_ITER_EVENT_FORK)
4434                 trace_event_follow_fork(tr, enabled);
4435
4436         if (mask == TRACE_ITER_FUNC_FORK)
4437                 ftrace_pid_follow_fork(tr, enabled);
4438
4439         if (mask == TRACE_ITER_OVERWRITE) {
4440                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4441 #ifdef CONFIG_TRACER_MAX_TRACE
4442                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4443 #endif
4444         }
4445
4446         if (mask == TRACE_ITER_PRINTK) {
4447                 trace_printk_start_stop_comm(enabled);
4448                 trace_printk_control(enabled);
4449         }
4450
4451         return 0;
4452 }
4453
4454 static int trace_set_options(struct trace_array *tr, char *option)
4455 {
4456         char *cmp;
4457         int neg = 0;
4458         int ret;
4459         size_t orig_len = strlen(option);
4460
4461         cmp = strstrip(option);
4462
4463         if (strncmp(cmp, "no", 2) == 0) {
4464                 neg = 1;
4465                 cmp += 2;
4466         }
4467
4468         mutex_lock(&event_mutex);
4469         mutex_lock(&trace_types_lock);
4470
4471         ret = match_string(trace_options, -1, cmp);
4472         /* If no option could be set, test the specific tracer options */
4473         if (ret < 0)
4474                 ret = set_tracer_option(tr, cmp, neg);
4475         else
4476                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4477
4478         mutex_unlock(&trace_types_lock);
4479         mutex_unlock(&event_mutex);
4480
4481         /*
4482          * If the first trailing whitespace is replaced with '\0' by strstrip,
4483          * turn it back into a space.
4484          */
4485         if (orig_len > strlen(option))
4486                 option[strlen(option)] = ' ';
4487
4488         return ret;
4489 }
4490
4491 static void __init apply_trace_boot_options(void)
4492 {
4493         char *buf = trace_boot_options_buf;
4494         char *option;
4495
4496         while (true) {
4497                 option = strsep(&buf, ",");
4498
4499                 if (!option)
4500                         break;
4501
4502                 if (*option)
4503                         trace_set_options(&global_trace, option);
4504
4505                 /* Put back the comma to allow this to be called again */
4506                 if (buf)
4507                         *(buf - 1) = ',';
4508         }
4509 }
4510
4511 static ssize_t
4512 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4513                         size_t cnt, loff_t *ppos)
4514 {
4515         struct seq_file *m = filp->private_data;
4516         struct trace_array *tr = m->private;
4517         char buf[64];
4518         int ret;
4519
4520         if (cnt >= sizeof(buf))
4521                 return -EINVAL;
4522
4523         if (copy_from_user(buf, ubuf, cnt))
4524                 return -EFAULT;
4525
4526         buf[cnt] = 0;
4527
4528         ret = trace_set_options(tr, buf);
4529         if (ret < 0)
4530                 return ret;
4531
4532         *ppos += cnt;
4533
4534         return cnt;
4535 }
4536
4537 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4538 {
4539         struct trace_array *tr = inode->i_private;
4540         int ret;
4541
4542         if (tracing_disabled)
4543                 return -ENODEV;
4544
4545         if (trace_array_get(tr) < 0)
4546                 return -ENODEV;
4547
4548         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4549         if (ret < 0)
4550                 trace_array_put(tr);
4551
4552         return ret;
4553 }
4554
4555 static const struct file_operations tracing_iter_fops = {
4556         .open           = tracing_trace_options_open,
4557         .read           = seq_read,
4558         .llseek         = seq_lseek,
4559         .release        = tracing_single_release_tr,
4560         .write          = tracing_trace_options_write,
4561 };
4562
4563 static const char readme_msg[] =
4564         "tracing mini-HOWTO:\n\n"
4565         "# echo 0 > tracing_on : quick way to disable tracing\n"
4566         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4567         " Important files:\n"
4568         "  trace\t\t\t- The static contents of the buffer\n"
4569         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4570         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4571         "  current_tracer\t- function and latency tracers\n"
4572         "  available_tracers\t- list of configured tracers for current_tracer\n"
4573         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4574         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4575         "  trace_clock\t\t-change the clock used to order events\n"
4576         "       local:   Per cpu clock but may not be synced across CPUs\n"
4577         "      global:   Synced across CPUs but slows tracing down.\n"
4578         "     counter:   Not a clock, but just an increment\n"
4579         "      uptime:   Jiffy counter from time of boot\n"
4580         "        perf:   Same clock that perf events use\n"
4581 #ifdef CONFIG_X86_64
4582         "     x86-tsc:   TSC cycle counter\n"
4583 #endif
4584         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4585         "       delta:   Delta difference against a buffer-wide timestamp\n"
4586         "    absolute:   Absolute (standalone) timestamp\n"
4587         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4588         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4589         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4590         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4591         "\t\t\t  Remove sub-buffer with rmdir\n"
4592         "  trace_options\t\t- Set format or modify how tracing happens\n"
4593         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4594         "\t\t\t  option name\n"
4595         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "\n  available_filter_functions - list of functions that can be filtered on\n"
4598         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4599         "\t\t\t  functions\n"
4600         "\t     accepts: func_full_name or glob-matching-pattern\n"
4601         "\t     modules: Can select a group via module\n"
4602         "\t      Format: :mod:<module-name>\n"
4603         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4604         "\t    triggers: a command to perform when function is hit\n"
4605         "\t      Format: <function>:<trigger>[:count]\n"
4606         "\t     trigger: traceon, traceoff\n"
4607         "\t\t      enable_event:<system>:<event>\n"
4608         "\t\t      disable_event:<system>:<event>\n"
4609 #ifdef CONFIG_STACKTRACE
4610         "\t\t      stacktrace\n"
4611 #endif
4612 #ifdef CONFIG_TRACER_SNAPSHOT
4613         "\t\t      snapshot\n"
4614 #endif
4615         "\t\t      dump\n"
4616         "\t\t      cpudump\n"
4617         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4618         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4619         "\t     The first one will disable tracing every time do_fault is hit\n"
4620         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4621         "\t       The first time do trap is hit and it disables tracing, the\n"
4622         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4623         "\t       the counter will not decrement. It only decrements when the\n"
4624         "\t       trigger did work\n"
4625         "\t     To remove trigger without count:\n"
4626         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4627         "\t     To remove trigger with a count:\n"
4628         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4629         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4630         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4631         "\t    modules: Can select a group via module command :mod:\n"
4632         "\t    Does not accept triggers\n"
4633 #endif /* CONFIG_DYNAMIC_FTRACE */
4634 #ifdef CONFIG_FUNCTION_TRACER
4635         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4636         "\t\t    (function)\n"
4637 #endif
4638 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4639         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4640         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4641         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4642 #endif
4643 #ifdef CONFIG_TRACER_SNAPSHOT
4644         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4645         "\t\t\t  snapshot buffer. Read the contents for more\n"
4646         "\t\t\t  information\n"
4647 #endif
4648 #ifdef CONFIG_STACK_TRACER
4649         "  stack_trace\t\t- Shows the max stack trace when active\n"
4650         "  stack_max_size\t- Shows current max stack size that was traced\n"
4651         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4652         "\t\t\t  new trace)\n"
4653 #ifdef CONFIG_DYNAMIC_FTRACE
4654         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4655         "\t\t\t  traces\n"
4656 #endif
4657 #endif /* CONFIG_STACK_TRACER */
4658 #ifdef CONFIG_KPROBE_EVENTS
4659         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4660         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4661 #endif
4662 #ifdef CONFIG_UPROBE_EVENTS
4663         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4664         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4665 #endif
4666 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4667         "\t  accepts: event-definitions (one definition per line)\n"
4668         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4669         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4670         "\t           -:[<group>/]<event>\n"
4671 #ifdef CONFIG_KPROBE_EVENTS
4672         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4673   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4674 #endif
4675 #ifdef CONFIG_UPROBE_EVENTS
4676         "\t    place: <path>:<offset>\n"
4677 #endif
4678         "\t     args: <name>=fetcharg[:type]\n"
4679         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4680         "\t           $stack<index>, $stack, $retval, $comm\n"
4681         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4682         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4683 #endif
4684         "  events/\t\t- Directory containing all trace event subsystems:\n"
4685         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4686         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4687         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4688         "\t\t\t  events\n"
4689         "      filter\t\t- If set, only events passing filter are traced\n"
4690         "  events/<system>/<event>/\t- Directory containing control files for\n"
4691         "\t\t\t  <event>:\n"
4692         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4693         "      filter\t\t- If set, only events passing filter are traced\n"
4694         "      trigger\t\t- If set, a command to perform when event is hit\n"
4695         "\t    Format: <trigger>[:count][if <filter>]\n"
4696         "\t   trigger: traceon, traceoff\n"
4697         "\t            enable_event:<system>:<event>\n"
4698         "\t            disable_event:<system>:<event>\n"
4699 #ifdef CONFIG_HIST_TRIGGERS
4700         "\t            enable_hist:<system>:<event>\n"
4701         "\t            disable_hist:<system>:<event>\n"
4702 #endif
4703 #ifdef CONFIG_STACKTRACE
4704         "\t\t    stacktrace\n"
4705 #endif
4706 #ifdef CONFIG_TRACER_SNAPSHOT
4707         "\t\t    snapshot\n"
4708 #endif
4709 #ifdef CONFIG_HIST_TRIGGERS
4710         "\t\t    hist (see below)\n"
4711 #endif
4712         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4713         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4714         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4715         "\t                  events/block/block_unplug/trigger\n"
4716         "\t   The first disables tracing every time block_unplug is hit.\n"
4717         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4718         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4719         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4720         "\t   Like function triggers, the counter is only decremented if it\n"
4721         "\t    enabled or disabled tracing.\n"
4722         "\t   To remove a trigger without a count:\n"
4723         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4724         "\t   To remove a trigger with a count:\n"
4725         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4726         "\t   Filters can be ignored when removing a trigger.\n"
4727 #ifdef CONFIG_HIST_TRIGGERS
4728         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4729         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4730         "\t            [:values=<field1[,field2,...]>]\n"
4731         "\t            [:sort=<field1[,field2,...]>]\n"
4732         "\t            [:size=#entries]\n"
4733         "\t            [:pause][:continue][:clear]\n"
4734         "\t            [:name=histname1]\n"
4735         "\t            [if <filter>]\n\n"
4736         "\t    Note, special fields can be used as well:\n"
4737         "\t            common_timestamp - to record current timestamp\n"
4738         "\t            common_cpu - to record the CPU the event happened on\n"
4739         "\n"
4740         "\t    When a matching event is hit, an entry is added to a hash\n"
4741         "\t    table using the key(s) and value(s) named, and the value of a\n"
4742         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4743         "\t    correspond to fields in the event's format description.  Keys\n"
4744         "\t    can be any field, or the special string 'stacktrace'.\n"
4745         "\t    Compound keys consisting of up to two fields can be specified\n"
4746         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4747         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4748         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4749         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4750         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4751         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4752         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4753         "\t    its histogram data will be shared with other triggers of the\n"
4754         "\t    same name, and trigger hits will update this common data.\n\n"
4755         "\t    Reading the 'hist' file for the event will dump the hash\n"
4756         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4757         "\t    triggers attached to an event, there will be a table for each\n"
4758         "\t    trigger in the output.  The table displayed for a named\n"
4759         "\t    trigger will be the same as any other instance having the\n"
4760         "\t    same name.  The default format used to display a given field\n"
4761         "\t    can be modified by appending any of the following modifiers\n"
4762         "\t    to the field name, as applicable:\n\n"
4763         "\t            .hex        display a number as a hex value\n"
4764         "\t            .sym        display an address as a symbol\n"
4765         "\t            .sym-offset display an address as a symbol and offset\n"
4766         "\t            .execname   display a common_pid as a program name\n"
4767         "\t            .syscall    display a syscall id as a syscall name\n"
4768         "\t            .log2       display log2 value rather than raw number\n"
4769         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4770         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4771         "\t    trigger or to start a hist trigger but not log any events\n"
4772         "\t    until told to do so.  'continue' can be used to start or\n"
4773         "\t    restart a paused hist trigger.\n\n"
4774         "\t    The 'clear' parameter will clear the contents of a running\n"
4775         "\t    hist trigger and leave its current paused/active state\n"
4776         "\t    unchanged.\n\n"
4777         "\t    The enable_hist and disable_hist triggers can be used to\n"
4778         "\t    have one event conditionally start and stop another event's\n"
4779         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4780         "\t    the enable_event and disable_event triggers.\n"
4781 #endif
4782 ;
4783
4784 static ssize_t
4785 tracing_readme_read(struct file *filp, char __user *ubuf,
4786                        size_t cnt, loff_t *ppos)
4787 {
4788         return simple_read_from_buffer(ubuf, cnt, ppos,
4789                                         readme_msg, strlen(readme_msg));
4790 }
4791
4792 static const struct file_operations tracing_readme_fops = {
4793         .open           = tracing_open_generic,
4794         .read           = tracing_readme_read,
4795         .llseek         = generic_file_llseek,
4796 };
4797
4798 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4799 {
4800         int pid = ++(*pos);
4801
4802         return trace_find_tgid_ptr(pid);
4803 }
4804
4805 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4806 {
4807         int pid = *pos;
4808
4809         return trace_find_tgid_ptr(pid);
4810 }
4811
4812 static void saved_tgids_stop(struct seq_file *m, void *v)
4813 {
4814 }
4815
4816 static int saved_tgids_show(struct seq_file *m, void *v)
4817 {
4818         int *entry = (int *)v;
4819         int pid = entry - tgid_map;
4820         int tgid = *entry;
4821
4822         if (tgid == 0)
4823                 return SEQ_SKIP;
4824
4825         seq_printf(m, "%d %d\n", pid, tgid);
4826         return 0;
4827 }
4828
4829 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4830         .start          = saved_tgids_start,
4831         .stop           = saved_tgids_stop,
4832         .next           = saved_tgids_next,
4833         .show           = saved_tgids_show,
4834 };
4835
4836 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4837 {
4838         if (tracing_disabled)
4839                 return -ENODEV;
4840
4841         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4842 }
4843
4844
4845 static const struct file_operations tracing_saved_tgids_fops = {
4846         .open           = tracing_saved_tgids_open,
4847         .read           = seq_read,
4848         .llseek         = seq_lseek,
4849         .release        = seq_release,
4850 };
4851
4852 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4853 {
4854         unsigned int *ptr = v;
4855
4856         if (*pos || m->count)
4857                 ptr++;
4858
4859         (*pos)++;
4860
4861         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4862              ptr++) {
4863                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4864                         continue;
4865
4866                 return ptr;
4867         }
4868
4869         return NULL;
4870 }
4871
4872 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4873 {
4874         void *v;
4875         loff_t l = 0;
4876
4877         preempt_disable();
4878         arch_spin_lock(&trace_cmdline_lock);
4879
4880         v = &savedcmd->map_cmdline_to_pid[0];
4881         while (l <= *pos) {
4882                 v = saved_cmdlines_next(m, v, &l);
4883                 if (!v)
4884                         return NULL;
4885         }
4886
4887         return v;
4888 }
4889
4890 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4891 {
4892         arch_spin_unlock(&trace_cmdline_lock);
4893         preempt_enable();
4894 }
4895
4896 static int saved_cmdlines_show(struct seq_file *m, void *v)
4897 {
4898         char buf[TASK_COMM_LEN];
4899         unsigned int *pid = v;
4900
4901         __trace_find_cmdline(*pid, buf);
4902         seq_printf(m, "%d %s\n", *pid, buf);
4903         return 0;
4904 }
4905
4906 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4907         .start          = saved_cmdlines_start,
4908         .next           = saved_cmdlines_next,
4909         .stop           = saved_cmdlines_stop,
4910         .show           = saved_cmdlines_show,
4911 };
4912
4913 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4914 {
4915         if (tracing_disabled)
4916                 return -ENODEV;
4917
4918         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4919 }
4920
4921 static const struct file_operations tracing_saved_cmdlines_fops = {
4922         .open           = tracing_saved_cmdlines_open,
4923         .read           = seq_read,
4924         .llseek         = seq_lseek,
4925         .release        = seq_release,
4926 };
4927
4928 static ssize_t
4929 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4930                                  size_t cnt, loff_t *ppos)
4931 {
4932         char buf[64];
4933         int r;
4934
4935         arch_spin_lock(&trace_cmdline_lock);
4936         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4937         arch_spin_unlock(&trace_cmdline_lock);
4938
4939         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4940 }
4941
4942 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4943 {
4944         kfree(s->saved_cmdlines);
4945         kfree(s->map_cmdline_to_pid);
4946         kfree(s);
4947 }
4948
4949 static int tracing_resize_saved_cmdlines(unsigned int val)
4950 {
4951         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4952
4953         s = kmalloc(sizeof(*s), GFP_KERNEL);
4954         if (!s)
4955                 return -ENOMEM;
4956
4957         if (allocate_cmdlines_buffer(val, s) < 0) {
4958                 kfree(s);
4959                 return -ENOMEM;
4960         }
4961
4962         arch_spin_lock(&trace_cmdline_lock);
4963         savedcmd_temp = savedcmd;
4964         savedcmd = s;
4965         arch_spin_unlock(&trace_cmdline_lock);
4966         free_saved_cmdlines_buffer(savedcmd_temp);
4967
4968         return 0;
4969 }
4970
4971 static ssize_t
4972 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4973                                   size_t cnt, loff_t *ppos)
4974 {
4975         unsigned long val;
4976         int ret;
4977
4978         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4979         if (ret)
4980                 return ret;
4981
4982         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4983         if (!val || val > PID_MAX_DEFAULT)
4984                 return -EINVAL;
4985
4986         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4987         if (ret < 0)
4988                 return ret;
4989
4990         *ppos += cnt;
4991
4992         return cnt;
4993 }
4994
4995 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4996         .open           = tracing_open_generic,
4997         .read           = tracing_saved_cmdlines_size_read,
4998         .write          = tracing_saved_cmdlines_size_write,
4999 };
5000
5001 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5002 static union trace_eval_map_item *
5003 update_eval_map(union trace_eval_map_item *ptr)
5004 {
5005         if (!ptr->map.eval_string) {
5006                 if (ptr->tail.next) {
5007                         ptr = ptr->tail.next;
5008                         /* Set ptr to the next real item (skip head) */
5009                         ptr++;
5010                 } else
5011                         return NULL;
5012         }
5013         return ptr;
5014 }
5015
5016 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5017 {
5018         union trace_eval_map_item *ptr = v;
5019
5020         /*
5021          * Paranoid! If ptr points to end, we don't want to increment past it.
5022          * This really should never happen.
5023          */
5024         ptr = update_eval_map(ptr);
5025         if (WARN_ON_ONCE(!ptr))
5026                 return NULL;
5027
5028         ptr++;
5029
5030         (*pos)++;
5031
5032         ptr = update_eval_map(ptr);
5033
5034         return ptr;
5035 }
5036
5037 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5038 {
5039         union trace_eval_map_item *v;
5040         loff_t l = 0;
5041
5042         mutex_lock(&trace_eval_mutex);
5043
5044         v = trace_eval_maps;
5045         if (v)
5046                 v++;
5047
5048         while (v && l < *pos) {
5049                 v = eval_map_next(m, v, &l);
5050         }
5051
5052         return v;
5053 }
5054
5055 static void eval_map_stop(struct seq_file *m, void *v)
5056 {
5057         mutex_unlock(&trace_eval_mutex);
5058 }
5059
5060 static int eval_map_show(struct seq_file *m, void *v)
5061 {
5062         union trace_eval_map_item *ptr = v;
5063
5064         seq_printf(m, "%s %ld (%s)\n",
5065                    ptr->map.eval_string, ptr->map.eval_value,
5066                    ptr->map.system);
5067
5068         return 0;
5069 }
5070
5071 static const struct seq_operations tracing_eval_map_seq_ops = {
5072         .start          = eval_map_start,
5073         .next           = eval_map_next,
5074         .stop           = eval_map_stop,
5075         .show           = eval_map_show,
5076 };
5077
5078 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5079 {
5080         if (tracing_disabled)
5081                 return -ENODEV;
5082
5083         return seq_open(filp, &tracing_eval_map_seq_ops);
5084 }
5085
5086 static const struct file_operations tracing_eval_map_fops = {
5087         .open           = tracing_eval_map_open,
5088         .read           = seq_read,
5089         .llseek         = seq_lseek,
5090         .release        = seq_release,
5091 };
5092
5093 static inline union trace_eval_map_item *
5094 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5095 {
5096         /* Return tail of array given the head */
5097         return ptr + ptr->head.length + 1;
5098 }
5099
5100 static void
5101 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5102                            int len)
5103 {
5104         struct trace_eval_map **stop;
5105         struct trace_eval_map **map;
5106         union trace_eval_map_item *map_array;
5107         union trace_eval_map_item *ptr;
5108
5109         stop = start + len;
5110
5111         /*
5112          * The trace_eval_maps contains the map plus a head and tail item,
5113          * where the head holds the module and length of array, and the
5114          * tail holds a pointer to the next list.
5115          */
5116         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5117         if (!map_array) {
5118                 pr_warn("Unable to allocate trace eval mapping\n");
5119                 return;
5120         }
5121
5122         mutex_lock(&trace_eval_mutex);
5123
5124         if (!trace_eval_maps)
5125                 trace_eval_maps = map_array;
5126         else {
5127                 ptr = trace_eval_maps;
5128                 for (;;) {
5129                         ptr = trace_eval_jmp_to_tail(ptr);
5130                         if (!ptr->tail.next)
5131                                 break;
5132                         ptr = ptr->tail.next;
5133
5134                 }
5135                 ptr->tail.next = map_array;
5136         }
5137         map_array->head.mod = mod;
5138         map_array->head.length = len;
5139         map_array++;
5140
5141         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5142                 map_array->map = **map;
5143                 map_array++;
5144         }
5145         memset(map_array, 0, sizeof(*map_array));
5146
5147         mutex_unlock(&trace_eval_mutex);
5148 }
5149
5150 static void trace_create_eval_file(struct dentry *d_tracer)
5151 {
5152         trace_create_file("eval_map", 0444, d_tracer,
5153                           NULL, &tracing_eval_map_fops);
5154 }
5155
5156 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5157 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5158 static inline void trace_insert_eval_map_file(struct module *mod,
5159                               struct trace_eval_map **start, int len) { }
5160 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5161
5162 static void trace_insert_eval_map(struct module *mod,
5163                                   struct trace_eval_map **start, int len)
5164 {
5165         struct trace_eval_map **map;
5166
5167         if (len <= 0)
5168                 return;
5169
5170         map = start;
5171
5172         trace_event_eval_update(map, len);
5173
5174         trace_insert_eval_map_file(mod, start, len);
5175 }
5176
5177 static ssize_t
5178 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5179                        size_t cnt, loff_t *ppos)
5180 {
5181         struct trace_array *tr = filp->private_data;
5182         char buf[MAX_TRACER_SIZE+2];
5183         int r;
5184
5185         mutex_lock(&trace_types_lock);
5186         r = sprintf(buf, "%s\n", tr->current_trace->name);
5187         mutex_unlock(&trace_types_lock);
5188
5189         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5190 }
5191
5192 int tracer_init(struct tracer *t, struct trace_array *tr)
5193 {
5194         tracing_reset_online_cpus(&tr->trace_buffer);
5195         return t->init(tr);
5196 }
5197
5198 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5199 {
5200         int cpu;
5201
5202         for_each_tracing_cpu(cpu)
5203                 per_cpu_ptr(buf->data, cpu)->entries = val;
5204 }
5205
5206 #ifdef CONFIG_TRACER_MAX_TRACE
5207 /* resize @tr's buffer to the size of @size_tr's entries */
5208 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5209                                         struct trace_buffer *size_buf, int cpu_id)
5210 {
5211         int cpu, ret = 0;
5212
5213         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5214                 for_each_tracing_cpu(cpu) {
5215                         ret = ring_buffer_resize(trace_buf->buffer,
5216                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5217                         if (ret < 0)
5218                                 break;
5219                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5220                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5221                 }
5222         } else {
5223                 ret = ring_buffer_resize(trace_buf->buffer,
5224                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5225                 if (ret == 0)
5226                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5227                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5228         }
5229
5230         return ret;
5231 }
5232 #endif /* CONFIG_TRACER_MAX_TRACE */
5233
5234 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5235                                         unsigned long size, int cpu)
5236 {
5237         int ret;
5238
5239         /*
5240          * If kernel or user changes the size of the ring buffer
5241          * we use the size that was given, and we can forget about
5242          * expanding it later.
5243          */
5244         ring_buffer_expanded = true;
5245
5246         /* May be called before buffers are initialized */
5247         if (!tr->trace_buffer.buffer)
5248                 return 0;
5249
5250         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5251         if (ret < 0)
5252                 return ret;
5253
5254 #ifdef CONFIG_TRACER_MAX_TRACE
5255         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5256             !tr->current_trace->use_max_tr)
5257                 goto out;
5258
5259         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5260         if (ret < 0) {
5261                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5262                                                      &tr->trace_buffer, cpu);
5263                 if (r < 0) {
5264                         /*
5265                          * AARGH! We are left with different
5266                          * size max buffer!!!!
5267                          * The max buffer is our "snapshot" buffer.
5268                          * When a tracer needs a snapshot (one of the
5269                          * latency tracers), it swaps the max buffer
5270                          * with the saved snap shot. We succeeded to
5271                          * update the size of the main buffer, but failed to
5272                          * update the size of the max buffer. But when we tried
5273                          * to reset the main buffer to the original size, we
5274                          * failed there too. This is very unlikely to
5275                          * happen, but if it does, warn and kill all
5276                          * tracing.
5277                          */
5278                         WARN_ON(1);
5279                         tracing_disabled = 1;
5280                 }
5281                 return ret;
5282         }
5283
5284         if (cpu == RING_BUFFER_ALL_CPUS)
5285                 set_buffer_entries(&tr->max_buffer, size);
5286         else
5287                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5288
5289  out:
5290 #endif /* CONFIG_TRACER_MAX_TRACE */
5291
5292         if (cpu == RING_BUFFER_ALL_CPUS)
5293                 set_buffer_entries(&tr->trace_buffer, size);
5294         else
5295                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5296
5297         return ret;
5298 }
5299
5300 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5301                                           unsigned long size, int cpu_id)
5302 {
5303         int ret = size;
5304
5305         mutex_lock(&trace_types_lock);
5306
5307         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5308                 /* make sure, this cpu is enabled in the mask */
5309                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5310                         ret = -EINVAL;
5311                         goto out;
5312                 }
5313         }
5314
5315         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5316         if (ret < 0)
5317                 ret = -ENOMEM;
5318
5319 out:
5320         mutex_unlock(&trace_types_lock);
5321
5322         return ret;
5323 }
5324
5325
5326 /**
5327  * tracing_update_buffers - used by tracing facility to expand ring buffers
5328  *
5329  * To save on memory when the tracing is never used on a system with it
5330  * configured in. The ring buffers are set to a minimum size. But once
5331  * a user starts to use the tracing facility, then they need to grow
5332  * to their default size.
5333  *
5334  * This function is to be called when a tracer is about to be used.
5335  */
5336 int tracing_update_buffers(void)
5337 {
5338         int ret = 0;
5339
5340         mutex_lock(&trace_types_lock);
5341         if (!ring_buffer_expanded)
5342                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5343                                                 RING_BUFFER_ALL_CPUS);
5344         mutex_unlock(&trace_types_lock);
5345
5346         return ret;
5347 }
5348
5349 struct trace_option_dentry;
5350
5351 static void
5352 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5353
5354 /*
5355  * Used to clear out the tracer before deletion of an instance.
5356  * Must have trace_types_lock held.
5357  */
5358 static void tracing_set_nop(struct trace_array *tr)
5359 {
5360         if (tr->current_trace == &nop_trace)
5361                 return;
5362         
5363         tr->current_trace->enabled--;
5364
5365         if (tr->current_trace->reset)
5366                 tr->current_trace->reset(tr);
5367
5368         tr->current_trace = &nop_trace;
5369 }
5370
5371 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5372 {
5373         /* Only enable if the directory has been created already. */
5374         if (!tr->dir)
5375                 return;
5376
5377         create_trace_option_files(tr, t);
5378 }
5379
5380 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5381 {
5382         struct tracer *t;
5383 #ifdef CONFIG_TRACER_MAX_TRACE
5384         bool had_max_tr;
5385 #endif
5386         int ret = 0;
5387
5388         mutex_lock(&trace_types_lock);
5389
5390         if (!ring_buffer_expanded) {
5391                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5392                                                 RING_BUFFER_ALL_CPUS);
5393                 if (ret < 0)
5394                         goto out;
5395                 ret = 0;
5396         }
5397
5398         for (t = trace_types; t; t = t->next) {
5399                 if (strcmp(t->name, buf) == 0)
5400                         break;
5401         }
5402         if (!t) {
5403                 ret = -EINVAL;
5404                 goto out;
5405         }
5406         if (t == tr->current_trace)
5407                 goto out;
5408
5409         /* Some tracers won't work on kernel command line */
5410         if (system_state < SYSTEM_RUNNING && t->noboot) {
5411                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5412                         t->name);
5413                 goto out;
5414         }
5415
5416         /* Some tracers are only allowed for the top level buffer */
5417         if (!trace_ok_for_array(t, tr)) {
5418                 ret = -EINVAL;
5419                 goto out;
5420         }
5421
5422         /* If trace pipe files are being read, we can't change the tracer */
5423         if (tr->current_trace->ref) {
5424                 ret = -EBUSY;
5425                 goto out;
5426         }
5427
5428         trace_branch_disable();
5429
5430         tr->current_trace->enabled--;
5431
5432         if (tr->current_trace->reset)
5433                 tr->current_trace->reset(tr);
5434
5435         /* Current trace needs to be nop_trace before synchronize_sched */
5436         tr->current_trace = &nop_trace;
5437
5438 #ifdef CONFIG_TRACER_MAX_TRACE
5439         had_max_tr = tr->allocated_snapshot;
5440
5441         if (had_max_tr && !t->use_max_tr) {
5442                 /*
5443                  * We need to make sure that the update_max_tr sees that
5444                  * current_trace changed to nop_trace to keep it from
5445                  * swapping the buffers after we resize it.
5446                  * The update_max_tr is called from interrupts disabled
5447                  * so a synchronized_sched() is sufficient.
5448                  */
5449                 synchronize_sched();
5450                 free_snapshot(tr);
5451         }
5452 #endif
5453
5454 #ifdef CONFIG_TRACER_MAX_TRACE
5455         if (t->use_max_tr && !had_max_tr) {
5456                 ret = tracing_alloc_snapshot_instance(tr);
5457                 if (ret < 0)
5458                         goto out;
5459         }
5460 #endif
5461
5462         if (t->init) {
5463                 ret = tracer_init(t, tr);
5464                 if (ret)
5465                         goto out;
5466         }
5467
5468         tr->current_trace = t;
5469         tr->current_trace->enabled++;
5470         trace_branch_enable(tr);
5471  out:
5472         mutex_unlock(&trace_types_lock);
5473
5474         return ret;
5475 }
5476
5477 static ssize_t
5478 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5479                         size_t cnt, loff_t *ppos)
5480 {
5481         struct trace_array *tr = filp->private_data;
5482         char buf[MAX_TRACER_SIZE+1];
5483         int i;
5484         size_t ret;
5485         int err;
5486
5487         ret = cnt;
5488
5489         if (cnt > MAX_TRACER_SIZE)
5490                 cnt = MAX_TRACER_SIZE;
5491
5492         if (copy_from_user(buf, ubuf, cnt))
5493                 return -EFAULT;
5494
5495         buf[cnt] = 0;
5496
5497         /* strip ending whitespace. */
5498         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5499                 buf[i] = 0;
5500
5501         err = tracing_set_tracer(tr, buf);
5502         if (err)
5503                 return err;
5504
5505         *ppos += ret;
5506
5507         return ret;
5508 }
5509
5510 static ssize_t
5511 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5512                    size_t cnt, loff_t *ppos)
5513 {
5514         char buf[64];
5515         int r;
5516
5517         r = snprintf(buf, sizeof(buf), "%ld\n",
5518                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5519         if (r > sizeof(buf))
5520                 r = sizeof(buf);
5521         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5522 }
5523
5524 static ssize_t
5525 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5526                     size_t cnt, loff_t *ppos)
5527 {
5528         unsigned long val;
5529         int ret;
5530
5531         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5532         if (ret)
5533                 return ret;
5534
5535         *ptr = val * 1000;
5536
5537         return cnt;
5538 }
5539
5540 static ssize_t
5541 tracing_thresh_read(struct file *filp, char __user *ubuf,
5542                     size_t cnt, loff_t *ppos)
5543 {
5544         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5545 }
5546
5547 static ssize_t
5548 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5549                      size_t cnt, loff_t *ppos)
5550 {
5551         struct trace_array *tr = filp->private_data;
5552         int ret;
5553
5554         mutex_lock(&trace_types_lock);
5555         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5556         if (ret < 0)
5557                 goto out;
5558
5559         if (tr->current_trace->update_thresh) {
5560                 ret = tr->current_trace->update_thresh(tr);
5561                 if (ret < 0)
5562                         goto out;
5563         }
5564
5565         ret = cnt;
5566 out:
5567         mutex_unlock(&trace_types_lock);
5568
5569         return ret;
5570 }
5571
5572 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5573
5574 static ssize_t
5575 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5576                      size_t cnt, loff_t *ppos)
5577 {
5578         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5579 }
5580
5581 static ssize_t
5582 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5583                       size_t cnt, loff_t *ppos)
5584 {
5585         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5586 }
5587
5588 #endif
5589
5590 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5591 {
5592         struct trace_array *tr = inode->i_private;
5593         struct trace_iterator *iter;
5594         int ret = 0;
5595
5596         if (tracing_disabled)
5597                 return -ENODEV;
5598
5599         if (trace_array_get(tr) < 0)
5600                 return -ENODEV;
5601
5602         mutex_lock(&trace_types_lock);
5603
5604         /* create a buffer to store the information to pass to userspace */
5605         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5606         if (!iter) {
5607                 ret = -ENOMEM;
5608                 __trace_array_put(tr);
5609                 goto out;
5610         }
5611
5612         trace_seq_init(&iter->seq);
5613         iter->trace = tr->current_trace;
5614
5615         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5616                 ret = -ENOMEM;
5617                 goto fail;
5618         }
5619
5620         /* trace pipe does not show start of buffer */
5621         cpumask_setall(iter->started);
5622
5623         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5624                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5625
5626         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5627         if (trace_clocks[tr->clock_id].in_ns)
5628                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5629
5630         iter->tr = tr;
5631         iter->trace_buffer = &tr->trace_buffer;
5632         iter->cpu_file = tracing_get_cpu(inode);
5633         mutex_init(&iter->mutex);
5634         filp->private_data = iter;
5635
5636         if (iter->trace->pipe_open)
5637                 iter->trace->pipe_open(iter);
5638
5639         nonseekable_open(inode, filp);
5640
5641         tr->current_trace->ref++;
5642 out:
5643         mutex_unlock(&trace_types_lock);
5644         return ret;
5645
5646 fail:
5647         kfree(iter);
5648         __trace_array_put(tr);
5649         mutex_unlock(&trace_types_lock);
5650         return ret;
5651 }
5652
5653 static int tracing_release_pipe(struct inode *inode, struct file *file)
5654 {
5655         struct trace_iterator *iter = file->private_data;
5656         struct trace_array *tr = inode->i_private;
5657
5658         mutex_lock(&trace_types_lock);
5659
5660         tr->current_trace->ref--;
5661
5662         if (iter->trace->pipe_close)
5663                 iter->trace->pipe_close(iter);
5664
5665         mutex_unlock(&trace_types_lock);
5666
5667         free_cpumask_var(iter->started);
5668         mutex_destroy(&iter->mutex);
5669         kfree(iter);
5670
5671         trace_array_put(tr);
5672
5673         return 0;
5674 }
5675
5676 static __poll_t
5677 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5678 {
5679         struct trace_array *tr = iter->tr;
5680
5681         /* Iterators are static, they should be filled or empty */
5682         if (trace_buffer_iter(iter, iter->cpu_file))
5683                 return EPOLLIN | EPOLLRDNORM;
5684
5685         if (tr->trace_flags & TRACE_ITER_BLOCK)
5686                 /*
5687                  * Always select as readable when in blocking mode
5688                  */
5689                 return EPOLLIN | EPOLLRDNORM;
5690         else
5691                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5692                                              filp, poll_table);
5693 }
5694
5695 static __poll_t
5696 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5697 {
5698         struct trace_iterator *iter = filp->private_data;
5699
5700         return trace_poll(iter, filp, poll_table);
5701 }
5702
5703 /* Must be called with iter->mutex held. */
5704 static int tracing_wait_pipe(struct file *filp)
5705 {
5706         struct trace_iterator *iter = filp->private_data;
5707         int ret;
5708
5709         while (trace_empty(iter)) {
5710
5711                 if ((filp->f_flags & O_NONBLOCK)) {
5712                         return -EAGAIN;
5713                 }
5714
5715                 /*
5716                  * We block until we read something and tracing is disabled.
5717                  * We still block if tracing is disabled, but we have never
5718                  * read anything. This allows a user to cat this file, and
5719                  * then enable tracing. But after we have read something,
5720                  * we give an EOF when tracing is again disabled.
5721                  *
5722                  * iter->pos will be 0 if we haven't read anything.
5723                  */
5724                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5725                         break;
5726
5727                 mutex_unlock(&iter->mutex);
5728
5729                 ret = wait_on_pipe(iter, false);
5730
5731                 mutex_lock(&iter->mutex);
5732
5733                 if (ret)
5734                         return ret;
5735         }
5736
5737         return 1;
5738 }
5739
5740 /*
5741  * Consumer reader.
5742  */
5743 static ssize_t
5744 tracing_read_pipe(struct file *filp, char __user *ubuf,
5745                   size_t cnt, loff_t *ppos)
5746 {
5747         struct trace_iterator *iter = filp->private_data;
5748         ssize_t sret;
5749
5750         /*
5751          * Avoid more than one consumer on a single file descriptor
5752          * This is just a matter of traces coherency, the ring buffer itself
5753          * is protected.
5754          */
5755         mutex_lock(&iter->mutex);
5756
5757         /* return any leftover data */
5758         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5759         if (sret != -EBUSY)
5760                 goto out;
5761
5762         trace_seq_init(&iter->seq);
5763
5764         if (iter->trace->read) {
5765                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5766                 if (sret)
5767                         goto out;
5768         }
5769
5770 waitagain:
5771         sret = tracing_wait_pipe(filp);
5772         if (sret <= 0)
5773                 goto out;
5774
5775         /* stop when tracing is finished */
5776         if (trace_empty(iter)) {
5777                 sret = 0;
5778                 goto out;
5779         }
5780
5781         if (cnt >= PAGE_SIZE)
5782                 cnt = PAGE_SIZE - 1;
5783
5784         /* reset all but tr, trace, and overruns */
5785         memset(&iter->seq, 0,
5786                sizeof(struct trace_iterator) -
5787                offsetof(struct trace_iterator, seq));
5788         cpumask_clear(iter->started);
5789         trace_seq_init(&iter->seq);
5790         iter->pos = -1;
5791
5792         trace_event_read_lock();
5793         trace_access_lock(iter->cpu_file);
5794         while (trace_find_next_entry_inc(iter) != NULL) {
5795                 enum print_line_t ret;
5796                 int save_len = iter->seq.seq.len;
5797
5798                 ret = print_trace_line(iter);
5799                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5800                         /* don't print partial lines */
5801                         iter->seq.seq.len = save_len;
5802                         break;
5803                 }
5804                 if (ret != TRACE_TYPE_NO_CONSUME)
5805                         trace_consume(iter);
5806
5807                 if (trace_seq_used(&iter->seq) >= cnt)
5808                         break;
5809
5810                 /*
5811                  * Setting the full flag means we reached the trace_seq buffer
5812                  * size and we should leave by partial output condition above.
5813                  * One of the trace_seq_* functions is not used properly.
5814                  */
5815                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5816                           iter->ent->type);
5817         }
5818         trace_access_unlock(iter->cpu_file);
5819         trace_event_read_unlock();
5820
5821         /* Now copy what we have to the user */
5822         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5823         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5824                 trace_seq_init(&iter->seq);
5825
5826         /*
5827          * If there was nothing to send to user, in spite of consuming trace
5828          * entries, go back to wait for more entries.
5829          */
5830         if (sret == -EBUSY)
5831                 goto waitagain;
5832
5833 out:
5834         mutex_unlock(&iter->mutex);
5835
5836         return sret;
5837 }
5838
5839 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5840                                      unsigned int idx)
5841 {
5842         __free_page(spd->pages[idx]);
5843 }
5844
5845 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5846         .can_merge              = 0,
5847         .confirm                = generic_pipe_buf_confirm,
5848         .release                = generic_pipe_buf_release,
5849         .steal                  = generic_pipe_buf_steal,
5850         .get                    = generic_pipe_buf_get,
5851 };
5852
5853 static size_t
5854 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5855 {
5856         size_t count;
5857         int save_len;
5858         int ret;
5859
5860         /* Seq buffer is page-sized, exactly what we need. */
5861         for (;;) {
5862                 save_len = iter->seq.seq.len;
5863                 ret = print_trace_line(iter);
5864
5865                 if (trace_seq_has_overflowed(&iter->seq)) {
5866                         iter->seq.seq.len = save_len;
5867                         break;
5868                 }
5869
5870                 /*
5871                  * This should not be hit, because it should only
5872                  * be set if the iter->seq overflowed. But check it
5873                  * anyway to be safe.
5874                  */
5875                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5876                         iter->seq.seq.len = save_len;
5877                         break;
5878                 }
5879
5880                 count = trace_seq_used(&iter->seq) - save_len;
5881                 if (rem < count) {
5882                         rem = 0;
5883                         iter->seq.seq.len = save_len;
5884                         break;
5885                 }
5886
5887                 if (ret != TRACE_TYPE_NO_CONSUME)
5888                         trace_consume(iter);
5889                 rem -= count;
5890                 if (!trace_find_next_entry_inc(iter))   {
5891                         rem = 0;
5892                         iter->ent = NULL;
5893                         break;
5894                 }
5895         }
5896
5897         return rem;
5898 }
5899
5900 static ssize_t tracing_splice_read_pipe(struct file *filp,
5901                                         loff_t *ppos,
5902                                         struct pipe_inode_info *pipe,
5903                                         size_t len,
5904                                         unsigned int flags)
5905 {
5906         struct page *pages_def[PIPE_DEF_BUFFERS];
5907         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5908         struct trace_iterator *iter = filp->private_data;
5909         struct splice_pipe_desc spd = {
5910                 .pages          = pages_def,
5911                 .partial        = partial_def,
5912                 .nr_pages       = 0, /* This gets updated below. */
5913                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5914                 .ops            = &tracing_pipe_buf_ops,
5915                 .spd_release    = tracing_spd_release_pipe,
5916         };
5917         ssize_t ret;
5918         size_t rem;
5919         unsigned int i;
5920
5921         if (splice_grow_spd(pipe, &spd))
5922                 return -ENOMEM;
5923
5924         mutex_lock(&iter->mutex);
5925
5926         if (iter->trace->splice_read) {
5927                 ret = iter->trace->splice_read(iter, filp,
5928                                                ppos, pipe, len, flags);
5929                 if (ret)
5930                         goto out_err;
5931         }
5932
5933         ret = tracing_wait_pipe(filp);
5934         if (ret <= 0)
5935                 goto out_err;
5936
5937         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5938                 ret = -EFAULT;
5939                 goto out_err;
5940         }
5941
5942         trace_event_read_lock();
5943         trace_access_lock(iter->cpu_file);
5944
5945         /* Fill as many pages as possible. */
5946         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5947                 spd.pages[i] = alloc_page(GFP_KERNEL);
5948                 if (!spd.pages[i])
5949                         break;
5950
5951                 rem = tracing_fill_pipe_page(rem, iter);
5952
5953                 /* Copy the data into the page, so we can start over. */
5954                 ret = trace_seq_to_buffer(&iter->seq,
5955                                           page_address(spd.pages[i]),
5956                                           trace_seq_used(&iter->seq));
5957                 if (ret < 0) {
5958                         __free_page(spd.pages[i]);
5959                         break;
5960                 }
5961                 spd.partial[i].offset = 0;
5962                 spd.partial[i].len = trace_seq_used(&iter->seq);
5963
5964                 trace_seq_init(&iter->seq);
5965         }
5966
5967         trace_access_unlock(iter->cpu_file);
5968         trace_event_read_unlock();
5969         mutex_unlock(&iter->mutex);
5970
5971         spd.nr_pages = i;
5972
5973         if (i)
5974                 ret = splice_to_pipe(pipe, &spd);
5975         else
5976                 ret = 0;
5977 out:
5978         splice_shrink_spd(&spd);
5979         return ret;
5980
5981 out_err:
5982         mutex_unlock(&iter->mutex);
5983         goto out;
5984 }
5985
5986 static ssize_t
5987 tracing_entries_read(struct file *filp, char __user *ubuf,
5988                      size_t cnt, loff_t *ppos)
5989 {
5990         struct inode *inode = file_inode(filp);
5991         struct trace_array *tr = inode->i_private;
5992         int cpu = tracing_get_cpu(inode);
5993         char buf[64];
5994         int r = 0;
5995         ssize_t ret;
5996
5997         mutex_lock(&trace_types_lock);
5998
5999         if (cpu == RING_BUFFER_ALL_CPUS) {
6000                 int cpu, buf_size_same;
6001                 unsigned long size;
6002
6003                 size = 0;
6004                 buf_size_same = 1;
6005                 /* check if all cpu sizes are same */
6006                 for_each_tracing_cpu(cpu) {
6007                         /* fill in the size from first enabled cpu */
6008                         if (size == 0)
6009                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6010                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6011                                 buf_size_same = 0;
6012                                 break;
6013                         }
6014                 }
6015
6016                 if (buf_size_same) {
6017                         if (!ring_buffer_expanded)
6018                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6019                                             size >> 10,
6020                                             trace_buf_size >> 10);
6021                         else
6022                                 r = sprintf(buf, "%lu\n", size >> 10);
6023                 } else
6024                         r = sprintf(buf, "X\n");
6025         } else
6026                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6027
6028         mutex_unlock(&trace_types_lock);
6029
6030         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6031         return ret;
6032 }
6033
6034 static ssize_t
6035 tracing_entries_write(struct file *filp, const char __user *ubuf,
6036                       size_t cnt, loff_t *ppos)
6037 {
6038         struct inode *inode = file_inode(filp);
6039         struct trace_array *tr = inode->i_private;
6040         unsigned long val;
6041         int ret;
6042
6043         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6044         if (ret)
6045                 return ret;
6046
6047         /* must have at least 1 entry */
6048         if (!val)
6049                 return -EINVAL;
6050
6051         /* value is in KB */
6052         val <<= 10;
6053         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6054         if (ret < 0)
6055                 return ret;
6056
6057         *ppos += cnt;
6058
6059         return cnt;
6060 }
6061
6062 static ssize_t
6063 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6064                                 size_t cnt, loff_t *ppos)
6065 {
6066         struct trace_array *tr = filp->private_data;
6067         char buf[64];
6068         int r, cpu;
6069         unsigned long size = 0, expanded_size = 0;
6070
6071         mutex_lock(&trace_types_lock);
6072         for_each_tracing_cpu(cpu) {
6073                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6074                 if (!ring_buffer_expanded)
6075                         expanded_size += trace_buf_size >> 10;
6076         }
6077         if (ring_buffer_expanded)
6078                 r = sprintf(buf, "%lu\n", size);
6079         else
6080                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6081         mutex_unlock(&trace_types_lock);
6082
6083         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6084 }
6085
6086 static ssize_t
6087 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6088                           size_t cnt, loff_t *ppos)
6089 {
6090         /*
6091          * There is no need to read what the user has written, this function
6092          * is just to make sure that there is no error when "echo" is used
6093          */
6094
6095         *ppos += cnt;
6096
6097         return cnt;
6098 }
6099
6100 static int
6101 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6102 {
6103         struct trace_array *tr = inode->i_private;
6104
6105         /* disable tracing ? */
6106         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6107                 tracer_tracing_off(tr);
6108         /* resize the ring buffer to 0 */
6109         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6110
6111         trace_array_put(tr);
6112
6113         return 0;
6114 }
6115
6116 static ssize_t
6117 tracing_mark_write(struct file *filp, const char __user *ubuf,
6118                                         size_t cnt, loff_t *fpos)
6119 {
6120         struct trace_array *tr = filp->private_data;
6121         struct ring_buffer_event *event;
6122         enum event_trigger_type tt = ETT_NONE;
6123         struct ring_buffer *buffer;
6124         struct print_entry *entry;
6125         unsigned long irq_flags;
6126         const char faulted[] = "<faulted>";
6127         ssize_t written;
6128         int size;
6129         int len;
6130
6131 /* Used in tracing_mark_raw_write() as well */
6132 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6133
6134         if (tracing_disabled)
6135                 return -EINVAL;
6136
6137         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6138                 return -EINVAL;
6139
6140         if (cnt > TRACE_BUF_SIZE)
6141                 cnt = TRACE_BUF_SIZE;
6142
6143         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6144
6145         local_save_flags(irq_flags);
6146         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6147
6148         /* If less than "<faulted>", then make sure we can still add that */
6149         if (cnt < FAULTED_SIZE)
6150                 size += FAULTED_SIZE - cnt;
6151
6152         buffer = tr->trace_buffer.buffer;
6153         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6154                                             irq_flags, preempt_count());
6155         if (unlikely(!event))
6156                 /* Ring buffer disabled, return as if not open for write */
6157                 return -EBADF;
6158
6159         entry = ring_buffer_event_data(event);
6160         entry->ip = _THIS_IP_;
6161
6162         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6163         if (len) {
6164                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6165                 cnt = FAULTED_SIZE;
6166                 written = -EFAULT;
6167         } else
6168                 written = cnt;
6169         len = cnt;
6170
6171         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6172                 /* do not add \n before testing triggers, but add \0 */
6173                 entry->buf[cnt] = '\0';
6174                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6175         }
6176
6177         if (entry->buf[cnt - 1] != '\n') {
6178                 entry->buf[cnt] = '\n';
6179                 entry->buf[cnt + 1] = '\0';
6180         } else
6181                 entry->buf[cnt] = '\0';
6182
6183         __buffer_unlock_commit(buffer, event);
6184
6185         if (tt)
6186                 event_triggers_post_call(tr->trace_marker_file, tt);
6187
6188         if (written > 0)
6189                 *fpos += written;
6190
6191         return written;
6192 }
6193
6194 /* Limit it for now to 3K (including tag) */
6195 #define RAW_DATA_MAX_SIZE (1024*3)
6196
6197 static ssize_t
6198 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6199                                         size_t cnt, loff_t *fpos)
6200 {
6201         struct trace_array *tr = filp->private_data;
6202         struct ring_buffer_event *event;
6203         struct ring_buffer *buffer;
6204         struct raw_data_entry *entry;
6205         const char faulted[] = "<faulted>";
6206         unsigned long irq_flags;
6207         ssize_t written;
6208         int size;
6209         int len;
6210
6211 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6212
6213         if (tracing_disabled)
6214                 return -EINVAL;
6215
6216         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6217                 return -EINVAL;
6218
6219         /* The marker must at least have a tag id */
6220         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6221                 return -EINVAL;
6222
6223         if (cnt > TRACE_BUF_SIZE)
6224                 cnt = TRACE_BUF_SIZE;
6225
6226         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6227
6228         local_save_flags(irq_flags);
6229         size = sizeof(*entry) + cnt;
6230         if (cnt < FAULT_SIZE_ID)
6231                 size += FAULT_SIZE_ID - cnt;
6232
6233         buffer = tr->trace_buffer.buffer;
6234         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6235                                             irq_flags, preempt_count());
6236         if (!event)
6237                 /* Ring buffer disabled, return as if not open for write */
6238                 return -EBADF;
6239
6240         entry = ring_buffer_event_data(event);
6241
6242         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6243         if (len) {
6244                 entry->id = -1;
6245                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6246                 written = -EFAULT;
6247         } else
6248                 written = cnt;
6249
6250         __buffer_unlock_commit(buffer, event);
6251
6252         if (written > 0)
6253                 *fpos += written;
6254
6255         return written;
6256 }
6257
6258 static int tracing_clock_show(struct seq_file *m, void *v)
6259 {
6260         struct trace_array *tr = m->private;
6261         int i;
6262
6263         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6264                 seq_printf(m,
6265                         "%s%s%s%s", i ? " " : "",
6266                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6267                         i == tr->clock_id ? "]" : "");
6268         seq_putc(m, '\n');
6269
6270         return 0;
6271 }
6272
6273 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6274 {
6275         int i;
6276
6277         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6278                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6279                         break;
6280         }
6281         if (i == ARRAY_SIZE(trace_clocks))
6282                 return -EINVAL;
6283
6284         mutex_lock(&trace_types_lock);
6285
6286         tr->clock_id = i;
6287
6288         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6289
6290         /*
6291          * New clock may not be consistent with the previous clock.
6292          * Reset the buffer so that it doesn't have incomparable timestamps.
6293          */
6294         tracing_reset_online_cpus(&tr->trace_buffer);
6295
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297         if (tr->max_buffer.buffer)
6298                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6299         tracing_reset_online_cpus(&tr->max_buffer);
6300 #endif
6301
6302         mutex_unlock(&trace_types_lock);
6303
6304         return 0;
6305 }
6306
6307 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6308                                    size_t cnt, loff_t *fpos)
6309 {
6310         struct seq_file *m = filp->private_data;
6311         struct trace_array *tr = m->private;
6312         char buf[64];
6313         const char *clockstr;
6314         int ret;
6315
6316         if (cnt >= sizeof(buf))
6317                 return -EINVAL;
6318
6319         if (copy_from_user(buf, ubuf, cnt))
6320                 return -EFAULT;
6321
6322         buf[cnt] = 0;
6323
6324         clockstr = strstrip(buf);
6325
6326         ret = tracing_set_clock(tr, clockstr);
6327         if (ret)
6328                 return ret;
6329
6330         *fpos += cnt;
6331
6332         return cnt;
6333 }
6334
6335 static int tracing_clock_open(struct inode *inode, struct file *file)
6336 {
6337         struct trace_array *tr = inode->i_private;
6338         int ret;
6339
6340         if (tracing_disabled)
6341                 return -ENODEV;
6342
6343         if (trace_array_get(tr))
6344                 return -ENODEV;
6345
6346         ret = single_open(file, tracing_clock_show, inode->i_private);
6347         if (ret < 0)
6348                 trace_array_put(tr);
6349
6350         return ret;
6351 }
6352
6353 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6354 {
6355         struct trace_array *tr = m->private;
6356
6357         mutex_lock(&trace_types_lock);
6358
6359         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6360                 seq_puts(m, "delta [absolute]\n");
6361         else
6362                 seq_puts(m, "[delta] absolute\n");
6363
6364         mutex_unlock(&trace_types_lock);
6365
6366         return 0;
6367 }
6368
6369 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6370 {
6371         struct trace_array *tr = inode->i_private;
6372         int ret;
6373
6374         if (tracing_disabled)
6375                 return -ENODEV;
6376
6377         if (trace_array_get(tr))
6378                 return -ENODEV;
6379
6380         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6381         if (ret < 0)
6382                 trace_array_put(tr);
6383
6384         return ret;
6385 }
6386
6387 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6388 {
6389         int ret = 0;
6390
6391         mutex_lock(&trace_types_lock);
6392
6393         if (abs && tr->time_stamp_abs_ref++)
6394                 goto out;
6395
6396         if (!abs) {
6397                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6398                         ret = -EINVAL;
6399                         goto out;
6400                 }
6401
6402                 if (--tr->time_stamp_abs_ref)
6403                         goto out;
6404         }
6405
6406         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6407
6408 #ifdef CONFIG_TRACER_MAX_TRACE
6409         if (tr->max_buffer.buffer)
6410                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6411 #endif
6412  out:
6413         mutex_unlock(&trace_types_lock);
6414
6415         return ret;
6416 }
6417
6418 struct ftrace_buffer_info {
6419         struct trace_iterator   iter;
6420         void                    *spare;
6421         unsigned int            spare_cpu;
6422         unsigned int            read;
6423 };
6424
6425 #ifdef CONFIG_TRACER_SNAPSHOT
6426 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6427 {
6428         struct trace_array *tr = inode->i_private;
6429         struct trace_iterator *iter;
6430         struct seq_file *m;
6431         int ret = 0;
6432
6433         if (trace_array_get(tr) < 0)
6434                 return -ENODEV;
6435
6436         if (file->f_mode & FMODE_READ) {
6437                 iter = __tracing_open(inode, file, true);
6438                 if (IS_ERR(iter))
6439                         ret = PTR_ERR(iter);
6440         } else {
6441                 /* Writes still need the seq_file to hold the private data */
6442                 ret = -ENOMEM;
6443                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6444                 if (!m)
6445                         goto out;
6446                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6447                 if (!iter) {
6448                         kfree(m);
6449                         goto out;
6450                 }
6451                 ret = 0;
6452
6453                 iter->tr = tr;
6454                 iter->trace_buffer = &tr->max_buffer;
6455                 iter->cpu_file = tracing_get_cpu(inode);
6456                 m->private = iter;
6457                 file->private_data = m;
6458         }
6459 out:
6460         if (ret < 0)
6461                 trace_array_put(tr);
6462
6463         return ret;
6464 }
6465
6466 static ssize_t
6467 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6468                        loff_t *ppos)
6469 {
6470         struct seq_file *m = filp->private_data;
6471         struct trace_iterator *iter = m->private;
6472         struct trace_array *tr = iter->tr;
6473         unsigned long val;
6474         int ret;
6475
6476         ret = tracing_update_buffers();
6477         if (ret < 0)
6478                 return ret;
6479
6480         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6481         if (ret)
6482                 return ret;
6483
6484         mutex_lock(&trace_types_lock);
6485
6486         if (tr->current_trace->use_max_tr) {
6487                 ret = -EBUSY;
6488                 goto out;
6489         }
6490
6491         switch (val) {
6492         case 0:
6493                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6494                         ret = -EINVAL;
6495                         break;
6496                 }
6497                 if (tr->allocated_snapshot)
6498                         free_snapshot(tr);
6499                 break;
6500         case 1:
6501 /* Only allow per-cpu swap if the ring buffer supports it */
6502 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6503                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6504                         ret = -EINVAL;
6505                         break;
6506                 }
6507 #endif
6508                 if (tr->allocated_snapshot)
6509                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6510                                         &tr->trace_buffer, iter->cpu_file);
6511                 else
6512                         ret = tracing_alloc_snapshot_instance(tr);
6513                 if (ret < 0)
6514                         break;
6515                 local_irq_disable();
6516                 /* Now, we're going to swap */
6517                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6518                         update_max_tr(tr, current, smp_processor_id());
6519                 else
6520                         update_max_tr_single(tr, current, iter->cpu_file);
6521                 local_irq_enable();
6522                 break;
6523         default:
6524                 if (tr->allocated_snapshot) {
6525                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6526                                 tracing_reset_online_cpus(&tr->max_buffer);
6527                         else
6528                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6529                 }
6530                 break;
6531         }
6532
6533         if (ret >= 0) {
6534                 *ppos += cnt;
6535                 ret = cnt;
6536         }
6537 out:
6538         mutex_unlock(&trace_types_lock);
6539         return ret;
6540 }
6541
6542 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6543 {
6544         struct seq_file *m = file->private_data;
6545         int ret;
6546
6547         ret = tracing_release(inode, file);
6548
6549         if (file->f_mode & FMODE_READ)
6550                 return ret;
6551
6552         /* If write only, the seq_file is just a stub */
6553         if (m)
6554                 kfree(m->private);
6555         kfree(m);
6556
6557         return 0;
6558 }
6559
6560 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6561 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6562                                     size_t count, loff_t *ppos);
6563 static int tracing_buffers_release(struct inode *inode, struct file *file);
6564 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6565                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6566
6567 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6568 {
6569         struct ftrace_buffer_info *info;
6570         int ret;
6571
6572         ret = tracing_buffers_open(inode, filp);
6573         if (ret < 0)
6574                 return ret;
6575
6576         info = filp->private_data;
6577
6578         if (info->iter.trace->use_max_tr) {
6579                 tracing_buffers_release(inode, filp);
6580                 return -EBUSY;
6581         }
6582
6583         info->iter.snapshot = true;
6584         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6585
6586         return ret;
6587 }
6588
6589 #endif /* CONFIG_TRACER_SNAPSHOT */
6590
6591
6592 static const struct file_operations tracing_thresh_fops = {
6593         .open           = tracing_open_generic,
6594         .read           = tracing_thresh_read,
6595         .write          = tracing_thresh_write,
6596         .llseek         = generic_file_llseek,
6597 };
6598
6599 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6600 static const struct file_operations tracing_max_lat_fops = {
6601         .open           = tracing_open_generic,
6602         .read           = tracing_max_lat_read,
6603         .write          = tracing_max_lat_write,
6604         .llseek         = generic_file_llseek,
6605 };
6606 #endif
6607
6608 static const struct file_operations set_tracer_fops = {
6609         .open           = tracing_open_generic,
6610         .read           = tracing_set_trace_read,
6611         .write          = tracing_set_trace_write,
6612         .llseek         = generic_file_llseek,
6613 };
6614
6615 static const struct file_operations tracing_pipe_fops = {
6616         .open           = tracing_open_pipe,
6617         .poll           = tracing_poll_pipe,
6618         .read           = tracing_read_pipe,
6619         .splice_read    = tracing_splice_read_pipe,
6620         .release        = tracing_release_pipe,
6621         .llseek         = no_llseek,
6622 };
6623
6624 static const struct file_operations tracing_entries_fops = {
6625         .open           = tracing_open_generic_tr,
6626         .read           = tracing_entries_read,
6627         .write          = tracing_entries_write,
6628         .llseek         = generic_file_llseek,
6629         .release        = tracing_release_generic_tr,
6630 };
6631
6632 static const struct file_operations tracing_total_entries_fops = {
6633         .open           = tracing_open_generic_tr,
6634         .read           = tracing_total_entries_read,
6635         .llseek         = generic_file_llseek,
6636         .release        = tracing_release_generic_tr,
6637 };
6638
6639 static const struct file_operations tracing_free_buffer_fops = {
6640         .open           = tracing_open_generic_tr,
6641         .write          = tracing_free_buffer_write,
6642         .release        = tracing_free_buffer_release,
6643 };
6644
6645 static const struct file_operations tracing_mark_fops = {
6646         .open           = tracing_open_generic_tr,
6647         .write          = tracing_mark_write,
6648         .llseek         = generic_file_llseek,
6649         .release        = tracing_release_generic_tr,
6650 };
6651
6652 static const struct file_operations tracing_mark_raw_fops = {
6653         .open           = tracing_open_generic_tr,
6654         .write          = tracing_mark_raw_write,
6655         .llseek         = generic_file_llseek,
6656         .release        = tracing_release_generic_tr,
6657 };
6658
6659 static const struct file_operations trace_clock_fops = {
6660         .open           = tracing_clock_open,
6661         .read           = seq_read,
6662         .llseek         = seq_lseek,
6663         .release        = tracing_single_release_tr,
6664         .write          = tracing_clock_write,
6665 };
6666
6667 static const struct file_operations trace_time_stamp_mode_fops = {
6668         .open           = tracing_time_stamp_mode_open,
6669         .read           = seq_read,
6670         .llseek         = seq_lseek,
6671         .release        = tracing_single_release_tr,
6672 };
6673
6674 #ifdef CONFIG_TRACER_SNAPSHOT
6675 static const struct file_operations snapshot_fops = {
6676         .open           = tracing_snapshot_open,
6677         .read           = seq_read,
6678         .write          = tracing_snapshot_write,
6679         .llseek         = tracing_lseek,
6680         .release        = tracing_snapshot_release,
6681 };
6682
6683 static const struct file_operations snapshot_raw_fops = {
6684         .open           = snapshot_raw_open,
6685         .read           = tracing_buffers_read,
6686         .release        = tracing_buffers_release,
6687         .splice_read    = tracing_buffers_splice_read,
6688         .llseek         = no_llseek,
6689 };
6690
6691 #endif /* CONFIG_TRACER_SNAPSHOT */
6692
6693 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6694 {
6695         struct trace_array *tr = inode->i_private;
6696         struct ftrace_buffer_info *info;
6697         int ret;
6698
6699         if (tracing_disabled)
6700                 return -ENODEV;
6701
6702         if (trace_array_get(tr) < 0)
6703                 return -ENODEV;
6704
6705         info = kzalloc(sizeof(*info), GFP_KERNEL);
6706         if (!info) {
6707                 trace_array_put(tr);
6708                 return -ENOMEM;
6709         }
6710
6711         mutex_lock(&trace_types_lock);
6712
6713         info->iter.tr           = tr;
6714         info->iter.cpu_file     = tracing_get_cpu(inode);
6715         info->iter.trace        = tr->current_trace;
6716         info->iter.trace_buffer = &tr->trace_buffer;
6717         info->spare             = NULL;
6718         /* Force reading ring buffer for first read */
6719         info->read              = (unsigned int)-1;
6720
6721         filp->private_data = info;
6722
6723         tr->current_trace->ref++;
6724
6725         mutex_unlock(&trace_types_lock);
6726
6727         ret = nonseekable_open(inode, filp);
6728         if (ret < 0)
6729                 trace_array_put(tr);
6730
6731         return ret;
6732 }
6733
6734 static __poll_t
6735 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6736 {
6737         struct ftrace_buffer_info *info = filp->private_data;
6738         struct trace_iterator *iter = &info->iter;
6739
6740         return trace_poll(iter, filp, poll_table);
6741 }
6742
6743 static ssize_t
6744 tracing_buffers_read(struct file *filp, char __user *ubuf,
6745                      size_t count, loff_t *ppos)
6746 {
6747         struct ftrace_buffer_info *info = filp->private_data;
6748         struct trace_iterator *iter = &info->iter;
6749         ssize_t ret = 0;
6750         ssize_t size;
6751
6752         if (!count)
6753                 return 0;
6754
6755 #ifdef CONFIG_TRACER_MAX_TRACE
6756         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6757                 return -EBUSY;
6758 #endif
6759
6760         if (!info->spare) {
6761                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6762                                                           iter->cpu_file);
6763                 if (IS_ERR(info->spare)) {
6764                         ret = PTR_ERR(info->spare);
6765                         info->spare = NULL;
6766                 } else {
6767                         info->spare_cpu = iter->cpu_file;
6768                 }
6769         }
6770         if (!info->spare)
6771                 return ret;
6772
6773         /* Do we have previous read data to read? */
6774         if (info->read < PAGE_SIZE)
6775                 goto read;
6776
6777  again:
6778         trace_access_lock(iter->cpu_file);
6779         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6780                                     &info->spare,
6781                                     count,
6782                                     iter->cpu_file, 0);
6783         trace_access_unlock(iter->cpu_file);
6784
6785         if (ret < 0) {
6786                 if (trace_empty(iter)) {
6787                         if ((filp->f_flags & O_NONBLOCK))
6788                                 return -EAGAIN;
6789
6790                         ret = wait_on_pipe(iter, false);
6791                         if (ret)
6792                                 return ret;
6793
6794                         goto again;
6795                 }
6796                 return 0;
6797         }
6798
6799         info->read = 0;
6800  read:
6801         size = PAGE_SIZE - info->read;
6802         if (size > count)
6803                 size = count;
6804
6805         ret = copy_to_user(ubuf, info->spare + info->read, size);
6806         if (ret == size)
6807                 return -EFAULT;
6808
6809         size -= ret;
6810
6811         *ppos += size;
6812         info->read += size;
6813
6814         return size;
6815 }
6816
6817 static int tracing_buffers_release(struct inode *inode, struct file *file)
6818 {
6819         struct ftrace_buffer_info *info = file->private_data;
6820         struct trace_iterator *iter = &info->iter;
6821
6822         mutex_lock(&trace_types_lock);
6823
6824         iter->tr->current_trace->ref--;
6825
6826         __trace_array_put(iter->tr);
6827
6828         if (info->spare)
6829                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6830                                            info->spare_cpu, info->spare);
6831         kfree(info);
6832
6833         mutex_unlock(&trace_types_lock);
6834
6835         return 0;
6836 }
6837
6838 struct buffer_ref {
6839         struct ring_buffer      *buffer;
6840         void                    *page;
6841         int                     cpu;
6842         refcount_t              refcount;
6843 };
6844
6845 static void buffer_ref_release(struct buffer_ref *ref)
6846 {
6847         if (!refcount_dec_and_test(&ref->refcount))
6848                 return;
6849         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6850         kfree(ref);
6851 }
6852
6853 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6854                                     struct pipe_buffer *buf)
6855 {
6856         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6857
6858         buffer_ref_release(ref);
6859         buf->private = 0;
6860 }
6861
6862 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6863                                 struct pipe_buffer *buf)
6864 {
6865         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6866
6867         if (refcount_read(&ref->refcount) > INT_MAX/2)
6868                 return false;
6869
6870         refcount_inc(&ref->refcount);
6871         return true;
6872 }
6873
6874 /* Pipe buffer operations for a buffer. */
6875 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6876         .can_merge              = 0,
6877         .confirm                = generic_pipe_buf_confirm,
6878         .release                = buffer_pipe_buf_release,
6879         .steal                  = generic_pipe_buf_nosteal,
6880         .get                    = buffer_pipe_buf_get,
6881 };
6882
6883 /*
6884  * Callback from splice_to_pipe(), if we need to release some pages
6885  * at the end of the spd in case we error'ed out in filling the pipe.
6886  */
6887 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6888 {
6889         struct buffer_ref *ref =
6890                 (struct buffer_ref *)spd->partial[i].private;
6891
6892         buffer_ref_release(ref);
6893         spd->partial[i].private = 0;
6894 }
6895
6896 static ssize_t
6897 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6898                             struct pipe_inode_info *pipe, size_t len,
6899                             unsigned int flags)
6900 {
6901         struct ftrace_buffer_info *info = file->private_data;
6902         struct trace_iterator *iter = &info->iter;
6903         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6904         struct page *pages_def[PIPE_DEF_BUFFERS];
6905         struct splice_pipe_desc spd = {
6906                 .pages          = pages_def,
6907                 .partial        = partial_def,
6908                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6909                 .ops            = &buffer_pipe_buf_ops,
6910                 .spd_release    = buffer_spd_release,
6911         };
6912         struct buffer_ref *ref;
6913         int entries, i;
6914         ssize_t ret = 0;
6915
6916 #ifdef CONFIG_TRACER_MAX_TRACE
6917         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6918                 return -EBUSY;
6919 #endif
6920
6921         if (*ppos & (PAGE_SIZE - 1))
6922                 return -EINVAL;
6923
6924         if (len & (PAGE_SIZE - 1)) {
6925                 if (len < PAGE_SIZE)
6926                         return -EINVAL;
6927                 len &= PAGE_MASK;
6928         }
6929
6930         if (splice_grow_spd(pipe, &spd))
6931                 return -ENOMEM;
6932
6933  again:
6934         trace_access_lock(iter->cpu_file);
6935         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6936
6937         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6938                 struct page *page;
6939                 int r;
6940
6941                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6942                 if (!ref) {
6943                         ret = -ENOMEM;
6944                         break;
6945                 }
6946
6947                 refcount_set(&ref->refcount, 1);
6948                 ref->buffer = iter->trace_buffer->buffer;
6949                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6950                 if (IS_ERR(ref->page)) {
6951                         ret = PTR_ERR(ref->page);
6952                         ref->page = NULL;
6953                         kfree(ref);
6954                         break;
6955                 }
6956                 ref->cpu = iter->cpu_file;
6957
6958                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6959                                           len, iter->cpu_file, 1);
6960                 if (r < 0) {
6961                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6962                                                    ref->page);
6963                         kfree(ref);
6964                         break;
6965                 }
6966
6967                 page = virt_to_page(ref->page);
6968
6969                 spd.pages[i] = page;
6970                 spd.partial[i].len = PAGE_SIZE;
6971                 spd.partial[i].offset = 0;
6972                 spd.partial[i].private = (unsigned long)ref;
6973                 spd.nr_pages++;
6974                 *ppos += PAGE_SIZE;
6975
6976                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6977         }
6978
6979         trace_access_unlock(iter->cpu_file);
6980         spd.nr_pages = i;
6981
6982         /* did we read anything? */
6983         if (!spd.nr_pages) {
6984                 if (ret)
6985                         goto out;
6986
6987                 ret = -EAGAIN;
6988                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6989                         goto out;
6990
6991                 ret = wait_on_pipe(iter, true);
6992                 if (ret)
6993                         goto out;
6994
6995                 goto again;
6996         }
6997
6998         ret = splice_to_pipe(pipe, &spd);
6999 out:
7000         splice_shrink_spd(&spd);
7001
7002         return ret;
7003 }
7004
7005 static const struct file_operations tracing_buffers_fops = {
7006         .open           = tracing_buffers_open,
7007         .read           = tracing_buffers_read,
7008         .poll           = tracing_buffers_poll,
7009         .release        = tracing_buffers_release,
7010         .splice_read    = tracing_buffers_splice_read,
7011         .llseek         = no_llseek,
7012 };
7013
7014 static ssize_t
7015 tracing_stats_read(struct file *filp, char __user *ubuf,
7016                    size_t count, loff_t *ppos)
7017 {
7018         struct inode *inode = file_inode(filp);
7019         struct trace_array *tr = inode->i_private;
7020         struct trace_buffer *trace_buf = &tr->trace_buffer;
7021         int cpu = tracing_get_cpu(inode);
7022         struct trace_seq *s;
7023         unsigned long cnt;
7024         unsigned long long t;
7025         unsigned long usec_rem;
7026
7027         s = kmalloc(sizeof(*s), GFP_KERNEL);
7028         if (!s)
7029                 return -ENOMEM;
7030
7031         trace_seq_init(s);
7032
7033         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7034         trace_seq_printf(s, "entries: %ld\n", cnt);
7035
7036         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7037         trace_seq_printf(s, "overrun: %ld\n", cnt);
7038
7039         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7040         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7041
7042         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7043         trace_seq_printf(s, "bytes: %ld\n", cnt);
7044
7045         if (trace_clocks[tr->clock_id].in_ns) {
7046                 /* local or global for trace_clock */
7047                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7048                 usec_rem = do_div(t, USEC_PER_SEC);
7049                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7050                                                                 t, usec_rem);
7051
7052                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7053                 usec_rem = do_div(t, USEC_PER_SEC);
7054                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7055         } else {
7056                 /* counter or tsc mode for trace_clock */
7057                 trace_seq_printf(s, "oldest event ts: %llu\n",
7058                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7059
7060                 trace_seq_printf(s, "now ts: %llu\n",
7061                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7062         }
7063
7064         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7065         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7066
7067         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7068         trace_seq_printf(s, "read events: %ld\n", cnt);
7069
7070         count = simple_read_from_buffer(ubuf, count, ppos,
7071                                         s->buffer, trace_seq_used(s));
7072
7073         kfree(s);
7074
7075         return count;
7076 }
7077
7078 static const struct file_operations tracing_stats_fops = {
7079         .open           = tracing_open_generic_tr,
7080         .read           = tracing_stats_read,
7081         .llseek         = generic_file_llseek,
7082         .release        = tracing_release_generic_tr,
7083 };
7084
7085 #ifdef CONFIG_DYNAMIC_FTRACE
7086
7087 static ssize_t
7088 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7089                   size_t cnt, loff_t *ppos)
7090 {
7091         unsigned long *p = filp->private_data;
7092         char buf[64]; /* Not too big for a shallow stack */
7093         int r;
7094
7095         r = scnprintf(buf, 63, "%ld", *p);
7096         buf[r++] = '\n';
7097
7098         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7099 }
7100
7101 static const struct file_operations tracing_dyn_info_fops = {
7102         .open           = tracing_open_generic,
7103         .read           = tracing_read_dyn_info,
7104         .llseek         = generic_file_llseek,
7105 };
7106 #endif /* CONFIG_DYNAMIC_FTRACE */
7107
7108 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7109 static void
7110 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7111                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7112                 void *data)
7113 {
7114         tracing_snapshot_instance(tr);
7115 }
7116
7117 static void
7118 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7119                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7120                       void *data)
7121 {
7122         struct ftrace_func_mapper *mapper = data;
7123         long *count = NULL;
7124
7125         if (mapper)
7126                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7127
7128         if (count) {
7129
7130                 if (*count <= 0)
7131                         return;
7132
7133                 (*count)--;
7134         }
7135
7136         tracing_snapshot_instance(tr);
7137 }
7138
7139 static int
7140 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7141                       struct ftrace_probe_ops *ops, void *data)
7142 {
7143         struct ftrace_func_mapper *mapper = data;
7144         long *count = NULL;
7145
7146         seq_printf(m, "%ps:", (void *)ip);
7147
7148         seq_puts(m, "snapshot");
7149
7150         if (mapper)
7151                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7152
7153         if (count)
7154                 seq_printf(m, ":count=%ld\n", *count);
7155         else
7156                 seq_puts(m, ":unlimited\n");
7157
7158         return 0;
7159 }
7160
7161 static int
7162 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7163                      unsigned long ip, void *init_data, void **data)
7164 {
7165         struct ftrace_func_mapper *mapper = *data;
7166
7167         if (!mapper) {
7168                 mapper = allocate_ftrace_func_mapper();
7169                 if (!mapper)
7170                         return -ENOMEM;
7171                 *data = mapper;
7172         }
7173
7174         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7175 }
7176
7177 static void
7178 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7179                      unsigned long ip, void *data)
7180 {
7181         struct ftrace_func_mapper *mapper = data;
7182
7183         if (!ip) {
7184                 if (!mapper)
7185                         return;
7186                 free_ftrace_func_mapper(mapper, NULL);
7187                 return;
7188         }
7189
7190         ftrace_func_mapper_remove_ip(mapper, ip);
7191 }
7192
7193 static struct ftrace_probe_ops snapshot_probe_ops = {
7194         .func                   = ftrace_snapshot,
7195         .print                  = ftrace_snapshot_print,
7196 };
7197
7198 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7199         .func                   = ftrace_count_snapshot,
7200         .print                  = ftrace_snapshot_print,
7201         .init                   = ftrace_snapshot_init,
7202         .free                   = ftrace_snapshot_free,
7203 };
7204
7205 static int
7206 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7207                                char *glob, char *cmd, char *param, int enable)
7208 {
7209         struct ftrace_probe_ops *ops;
7210         void *count = (void *)-1;
7211         char *number;
7212         int ret;
7213
7214         if (!tr)
7215                 return -ENODEV;
7216
7217         /* hash funcs only work with set_ftrace_filter */
7218         if (!enable)
7219                 return -EINVAL;
7220
7221         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7222
7223         if (glob[0] == '!')
7224                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7225
7226         if (!param)
7227                 goto out_reg;
7228
7229         number = strsep(&param, ":");
7230
7231         if (!strlen(number))
7232                 goto out_reg;
7233
7234         /*
7235          * We use the callback data field (which is a pointer)
7236          * as our counter.
7237          */
7238         ret = kstrtoul(number, 0, (unsigned long *)&count);
7239         if (ret)
7240                 return ret;
7241
7242  out_reg:
7243         ret = tracing_alloc_snapshot_instance(tr);
7244         if (ret < 0)
7245                 goto out;
7246
7247         ret = register_ftrace_function_probe(glob, tr, ops, count);
7248
7249  out:
7250         return ret < 0 ? ret : 0;
7251 }
7252
7253 static struct ftrace_func_command ftrace_snapshot_cmd = {
7254         .name                   = "snapshot",
7255         .func                   = ftrace_trace_snapshot_callback,
7256 };
7257
7258 static __init int register_snapshot_cmd(void)
7259 {
7260         return register_ftrace_command(&ftrace_snapshot_cmd);
7261 }
7262 #else
7263 static inline __init int register_snapshot_cmd(void) { return 0; }
7264 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7265
7266 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7267 {
7268         if (WARN_ON(!tr->dir))
7269                 return ERR_PTR(-ENODEV);
7270
7271         /* Top directory uses NULL as the parent */
7272         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7273                 return NULL;
7274
7275         /* All sub buffers have a descriptor */
7276         return tr->dir;
7277 }
7278
7279 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7280 {
7281         struct dentry *d_tracer;
7282
7283         if (tr->percpu_dir)
7284                 return tr->percpu_dir;
7285
7286         d_tracer = tracing_get_dentry(tr);
7287         if (IS_ERR(d_tracer))
7288                 return NULL;
7289
7290         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7291
7292         WARN_ONCE(!tr->percpu_dir,
7293                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7294
7295         return tr->percpu_dir;
7296 }
7297
7298 static struct dentry *
7299 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7300                       void *data, long cpu, const struct file_operations *fops)
7301 {
7302         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7303
7304         if (ret) /* See tracing_get_cpu() */
7305                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7306         return ret;
7307 }
7308
7309 static void
7310 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7311 {
7312         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7313         struct dentry *d_cpu;
7314         char cpu_dir[30]; /* 30 characters should be more than enough */
7315
7316         if (!d_percpu)
7317                 return;
7318
7319         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7320         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7321         if (!d_cpu) {
7322                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7323                 return;
7324         }
7325
7326         /* per cpu trace_pipe */
7327         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7328                                 tr, cpu, &tracing_pipe_fops);
7329
7330         /* per cpu trace */
7331         trace_create_cpu_file("trace", 0644, d_cpu,
7332                                 tr, cpu, &tracing_fops);
7333
7334         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7335                                 tr, cpu, &tracing_buffers_fops);
7336
7337         trace_create_cpu_file("stats", 0444, d_cpu,
7338                                 tr, cpu, &tracing_stats_fops);
7339
7340         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7341                                 tr, cpu, &tracing_entries_fops);
7342
7343 #ifdef CONFIG_TRACER_SNAPSHOT
7344         trace_create_cpu_file("snapshot", 0644, d_cpu,
7345                                 tr, cpu, &snapshot_fops);
7346
7347         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7348                                 tr, cpu, &snapshot_raw_fops);
7349 #endif
7350 }
7351
7352 #ifdef CONFIG_FTRACE_SELFTEST
7353 /* Let selftest have access to static functions in this file */
7354 #include "trace_selftest.c"
7355 #endif
7356
7357 static ssize_t
7358 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7359                         loff_t *ppos)
7360 {
7361         struct trace_option_dentry *topt = filp->private_data;
7362         char *buf;
7363
7364         if (topt->flags->val & topt->opt->bit)
7365                 buf = "1\n";
7366         else
7367                 buf = "0\n";
7368
7369         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7370 }
7371
7372 static ssize_t
7373 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7374                          loff_t *ppos)
7375 {
7376         struct trace_option_dentry *topt = filp->private_data;
7377         unsigned long val;
7378         int ret;
7379
7380         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7381         if (ret)
7382                 return ret;
7383
7384         if (val != 0 && val != 1)
7385                 return -EINVAL;
7386
7387         if (!!(topt->flags->val & topt->opt->bit) != val) {
7388                 mutex_lock(&trace_types_lock);
7389                 ret = __set_tracer_option(topt->tr, topt->flags,
7390                                           topt->opt, !val);
7391                 mutex_unlock(&trace_types_lock);
7392                 if (ret)
7393                         return ret;
7394         }
7395
7396         *ppos += cnt;
7397
7398         return cnt;
7399 }
7400
7401
7402 static const struct file_operations trace_options_fops = {
7403         .open = tracing_open_generic,
7404         .read = trace_options_read,
7405         .write = trace_options_write,
7406         .llseek = generic_file_llseek,
7407 };
7408
7409 /*
7410  * In order to pass in both the trace_array descriptor as well as the index
7411  * to the flag that the trace option file represents, the trace_array
7412  * has a character array of trace_flags_index[], which holds the index
7413  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7414  * The address of this character array is passed to the flag option file
7415  * read/write callbacks.
7416  *
7417  * In order to extract both the index and the trace_array descriptor,
7418  * get_tr_index() uses the following algorithm.
7419  *
7420  *   idx = *ptr;
7421  *
7422  * As the pointer itself contains the address of the index (remember
7423  * index[1] == 1).
7424  *
7425  * Then to get the trace_array descriptor, by subtracting that index
7426  * from the ptr, we get to the start of the index itself.
7427  *
7428  *   ptr - idx == &index[0]
7429  *
7430  * Then a simple container_of() from that pointer gets us to the
7431  * trace_array descriptor.
7432  */
7433 static void get_tr_index(void *data, struct trace_array **ptr,
7434                          unsigned int *pindex)
7435 {
7436         *pindex = *(unsigned char *)data;
7437
7438         *ptr = container_of(data - *pindex, struct trace_array,
7439                             trace_flags_index);
7440 }
7441
7442 static ssize_t
7443 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7444                         loff_t *ppos)
7445 {
7446         void *tr_index = filp->private_data;
7447         struct trace_array *tr;
7448         unsigned int index;
7449         char *buf;
7450
7451         get_tr_index(tr_index, &tr, &index);
7452
7453         if (tr->trace_flags & (1 << index))
7454                 buf = "1\n";
7455         else
7456                 buf = "0\n";
7457
7458         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7459 }
7460
7461 static ssize_t
7462 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7463                          loff_t *ppos)
7464 {
7465         void *tr_index = filp->private_data;
7466         struct trace_array *tr;
7467         unsigned int index;
7468         unsigned long val;
7469         int ret;
7470
7471         get_tr_index(tr_index, &tr, &index);
7472
7473         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7474         if (ret)
7475                 return ret;
7476
7477         if (val != 0 && val != 1)
7478                 return -EINVAL;
7479
7480         mutex_lock(&event_mutex);
7481         mutex_lock(&trace_types_lock);
7482         ret = set_tracer_flag(tr, 1 << index, val);
7483         mutex_unlock(&trace_types_lock);
7484         mutex_unlock(&event_mutex);
7485
7486         if (ret < 0)
7487                 return ret;
7488
7489         *ppos += cnt;
7490
7491         return cnt;
7492 }
7493
7494 static const struct file_operations trace_options_core_fops = {
7495         .open = tracing_open_generic,
7496         .read = trace_options_core_read,
7497         .write = trace_options_core_write,
7498         .llseek = generic_file_llseek,
7499 };
7500
7501 struct dentry *trace_create_file(const char *name,
7502                                  umode_t mode,
7503                                  struct dentry *parent,
7504                                  void *data,
7505                                  const struct file_operations *fops)
7506 {
7507         struct dentry *ret;
7508
7509         ret = tracefs_create_file(name, mode, parent, data, fops);
7510         if (!ret)
7511                 pr_warn("Could not create tracefs '%s' entry\n", name);
7512
7513         return ret;
7514 }
7515
7516
7517 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7518 {
7519         struct dentry *d_tracer;
7520
7521         if (tr->options)
7522                 return tr->options;
7523
7524         d_tracer = tracing_get_dentry(tr);
7525         if (IS_ERR(d_tracer))
7526                 return NULL;
7527
7528         tr->options = tracefs_create_dir("options", d_tracer);
7529         if (!tr->options) {
7530                 pr_warn("Could not create tracefs directory 'options'\n");
7531                 return NULL;
7532         }
7533
7534         return tr->options;
7535 }
7536
7537 static void
7538 create_trace_option_file(struct trace_array *tr,
7539                          struct trace_option_dentry *topt,
7540                          struct tracer_flags *flags,
7541                          struct tracer_opt *opt)
7542 {
7543         struct dentry *t_options;
7544
7545         t_options = trace_options_init_dentry(tr);
7546         if (!t_options)
7547                 return;
7548
7549         topt->flags = flags;
7550         topt->opt = opt;
7551         topt->tr = tr;
7552
7553         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7554                                     &trace_options_fops);
7555
7556 }
7557
7558 static void
7559 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7560 {
7561         struct trace_option_dentry *topts;
7562         struct trace_options *tr_topts;
7563         struct tracer_flags *flags;
7564         struct tracer_opt *opts;
7565         int cnt;
7566         int i;
7567
7568         if (!tracer)
7569                 return;
7570
7571         flags = tracer->flags;
7572
7573         if (!flags || !flags->opts)
7574                 return;
7575
7576         /*
7577          * If this is an instance, only create flags for tracers
7578          * the instance may have.
7579          */
7580         if (!trace_ok_for_array(tracer, tr))
7581                 return;
7582
7583         for (i = 0; i < tr->nr_topts; i++) {
7584                 /* Make sure there's no duplicate flags. */
7585                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7586                         return;
7587         }
7588
7589         opts = flags->opts;
7590
7591         for (cnt = 0; opts[cnt].name; cnt++)
7592                 ;
7593
7594         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7595         if (!topts)
7596                 return;
7597
7598         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7599                             GFP_KERNEL);
7600         if (!tr_topts) {
7601                 kfree(topts);
7602                 return;
7603         }
7604
7605         tr->topts = tr_topts;
7606         tr->topts[tr->nr_topts].tracer = tracer;
7607         tr->topts[tr->nr_topts].topts = topts;
7608         tr->nr_topts++;
7609
7610         for (cnt = 0; opts[cnt].name; cnt++) {
7611                 create_trace_option_file(tr, &topts[cnt], flags,
7612                                          &opts[cnt]);
7613                 WARN_ONCE(topts[cnt].entry == NULL,
7614                           "Failed to create trace option: %s",
7615                           opts[cnt].name);
7616         }
7617 }
7618
7619 static struct dentry *
7620 create_trace_option_core_file(struct trace_array *tr,
7621                               const char *option, long index)
7622 {
7623         struct dentry *t_options;
7624
7625         t_options = trace_options_init_dentry(tr);
7626         if (!t_options)
7627                 return NULL;
7628
7629         return trace_create_file(option, 0644, t_options,
7630                                  (void *)&tr->trace_flags_index[index],
7631                                  &trace_options_core_fops);
7632 }
7633
7634 static void create_trace_options_dir(struct trace_array *tr)
7635 {
7636         struct dentry *t_options;
7637         bool top_level = tr == &global_trace;
7638         int i;
7639
7640         t_options = trace_options_init_dentry(tr);
7641         if (!t_options)
7642                 return;
7643
7644         for (i = 0; trace_options[i]; i++) {
7645                 if (top_level ||
7646                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7647                         create_trace_option_core_file(tr, trace_options[i], i);
7648         }
7649 }
7650
7651 static ssize_t
7652 rb_simple_read(struct file *filp, char __user *ubuf,
7653                size_t cnt, loff_t *ppos)
7654 {
7655         struct trace_array *tr = filp->private_data;
7656         char buf[64];
7657         int r;
7658
7659         r = tracer_tracing_is_on(tr);
7660         r = sprintf(buf, "%d\n", r);
7661
7662         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7663 }
7664
7665 static ssize_t
7666 rb_simple_write(struct file *filp, const char __user *ubuf,
7667                 size_t cnt, loff_t *ppos)
7668 {
7669         struct trace_array *tr = filp->private_data;
7670         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7671         unsigned long val;
7672         int ret;
7673
7674         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7675         if (ret)
7676                 return ret;
7677
7678         if (buffer) {
7679                 mutex_lock(&trace_types_lock);
7680                 if (!!val == tracer_tracing_is_on(tr)) {
7681                         val = 0; /* do nothing */
7682                 } else if (val) {
7683                         tracer_tracing_on(tr);
7684                         if (tr->current_trace->start)
7685                                 tr->current_trace->start(tr);
7686                 } else {
7687                         tracer_tracing_off(tr);
7688                         if (tr->current_trace->stop)
7689                                 tr->current_trace->stop(tr);
7690                 }
7691                 mutex_unlock(&trace_types_lock);
7692         }
7693
7694         (*ppos)++;
7695
7696         return cnt;
7697 }
7698
7699 static const struct file_operations rb_simple_fops = {
7700         .open           = tracing_open_generic_tr,
7701         .read           = rb_simple_read,
7702         .write          = rb_simple_write,
7703         .release        = tracing_release_generic_tr,
7704         .llseek         = default_llseek,
7705 };
7706
7707 struct dentry *trace_instance_dir;
7708
7709 static void
7710 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7711
7712 static int
7713 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7714 {
7715         enum ring_buffer_flags rb_flags;
7716
7717         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7718
7719         buf->tr = tr;
7720
7721         buf->buffer = ring_buffer_alloc(size, rb_flags);
7722         if (!buf->buffer)
7723                 return -ENOMEM;
7724
7725         buf->data = alloc_percpu(struct trace_array_cpu);
7726         if (!buf->data) {
7727                 ring_buffer_free(buf->buffer);
7728                 buf->buffer = NULL;
7729                 return -ENOMEM;
7730         }
7731
7732         /* Allocate the first page for all buffers */
7733         set_buffer_entries(&tr->trace_buffer,
7734                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7735
7736         return 0;
7737 }
7738
7739 static int allocate_trace_buffers(struct trace_array *tr, int size)
7740 {
7741         int ret;
7742
7743         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7744         if (ret)
7745                 return ret;
7746
7747 #ifdef CONFIG_TRACER_MAX_TRACE
7748         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7749                                     allocate_snapshot ? size : 1);
7750         if (WARN_ON(ret)) {
7751                 ring_buffer_free(tr->trace_buffer.buffer);
7752                 tr->trace_buffer.buffer = NULL;
7753                 free_percpu(tr->trace_buffer.data);
7754                 tr->trace_buffer.data = NULL;
7755                 return -ENOMEM;
7756         }
7757         tr->allocated_snapshot = allocate_snapshot;
7758
7759         /*
7760          * Only the top level trace array gets its snapshot allocated
7761          * from the kernel command line.
7762          */
7763         allocate_snapshot = false;
7764 #endif
7765
7766         /*
7767          * Because of some magic with the way alloc_percpu() works on
7768          * x86_64, we need to synchronize the pgd of all the tables,
7769          * otherwise the trace events that happen in x86_64 page fault
7770          * handlers can't cope with accessing the chance that a
7771          * alloc_percpu()'d memory might be touched in the page fault trace
7772          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7773          * calls in tracing, because something might get triggered within a
7774          * page fault trace event!
7775          */
7776         vmalloc_sync_mappings();
7777
7778         return 0;
7779 }
7780
7781 static void free_trace_buffer(struct trace_buffer *buf)
7782 {
7783         if (buf->buffer) {
7784                 ring_buffer_free(buf->buffer);
7785                 buf->buffer = NULL;
7786                 free_percpu(buf->data);
7787                 buf->data = NULL;
7788         }
7789 }
7790
7791 static void free_trace_buffers(struct trace_array *tr)
7792 {
7793         if (!tr)
7794                 return;
7795
7796         free_trace_buffer(&tr->trace_buffer);
7797
7798 #ifdef CONFIG_TRACER_MAX_TRACE
7799         free_trace_buffer(&tr->max_buffer);
7800 #endif
7801 }
7802
7803 static void init_trace_flags_index(struct trace_array *tr)
7804 {
7805         int i;
7806
7807         /* Used by the trace options files */
7808         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7809                 tr->trace_flags_index[i] = i;
7810 }
7811
7812 static void __update_tracer_options(struct trace_array *tr)
7813 {
7814         struct tracer *t;
7815
7816         for (t = trace_types; t; t = t->next)
7817                 add_tracer_options(tr, t);
7818 }
7819
7820 static void update_tracer_options(struct trace_array *tr)
7821 {
7822         mutex_lock(&trace_types_lock);
7823         __update_tracer_options(tr);
7824         mutex_unlock(&trace_types_lock);
7825 }
7826
7827 static int instance_mkdir(const char *name)
7828 {
7829         struct trace_array *tr;
7830         int ret;
7831
7832         mutex_lock(&event_mutex);
7833         mutex_lock(&trace_types_lock);
7834
7835         ret = -EEXIST;
7836         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7837                 if (tr->name && strcmp(tr->name, name) == 0)
7838                         goto out_unlock;
7839         }
7840
7841         ret = -ENOMEM;
7842         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7843         if (!tr)
7844                 goto out_unlock;
7845
7846         tr->name = kstrdup(name, GFP_KERNEL);
7847         if (!tr->name)
7848                 goto out_free_tr;
7849
7850         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7851                 goto out_free_tr;
7852
7853         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7854
7855         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7856
7857         raw_spin_lock_init(&tr->start_lock);
7858
7859         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7860
7861         tr->current_trace = &nop_trace;
7862
7863         INIT_LIST_HEAD(&tr->systems);
7864         INIT_LIST_HEAD(&tr->events);
7865         INIT_LIST_HEAD(&tr->hist_vars);
7866
7867         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7868                 goto out_free_tr;
7869
7870         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7871         if (!tr->dir)
7872                 goto out_free_tr;
7873
7874         ret = event_trace_add_tracer(tr->dir, tr);
7875         if (ret) {
7876                 tracefs_remove_recursive(tr->dir);
7877                 goto out_free_tr;
7878         }
7879
7880         ftrace_init_trace_array(tr);
7881
7882         init_tracer_tracefs(tr, tr->dir);
7883         init_trace_flags_index(tr);
7884         __update_tracer_options(tr);
7885
7886         list_add(&tr->list, &ftrace_trace_arrays);
7887
7888         mutex_unlock(&trace_types_lock);
7889         mutex_unlock(&event_mutex);
7890
7891         return 0;
7892
7893  out_free_tr:
7894         free_trace_buffers(tr);
7895         free_cpumask_var(tr->tracing_cpumask);
7896         kfree(tr->name);
7897         kfree(tr);
7898
7899  out_unlock:
7900         mutex_unlock(&trace_types_lock);
7901         mutex_unlock(&event_mutex);
7902
7903         return ret;
7904
7905 }
7906
7907 static int instance_rmdir(const char *name)
7908 {
7909         struct trace_array *tr;
7910         int found = 0;
7911         int ret;
7912         int i;
7913
7914         mutex_lock(&event_mutex);
7915         mutex_lock(&trace_types_lock);
7916
7917         ret = -ENODEV;
7918         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7919                 if (tr->name && strcmp(tr->name, name) == 0) {
7920                         found = 1;
7921                         break;
7922                 }
7923         }
7924         if (!found)
7925                 goto out_unlock;
7926
7927         ret = -EBUSY;
7928         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7929                 goto out_unlock;
7930
7931         list_del(&tr->list);
7932
7933         /* Disable all the flags that were enabled coming in */
7934         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7935                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7936                         set_tracer_flag(tr, 1 << i, 0);
7937         }
7938
7939         tracing_set_nop(tr);
7940         clear_ftrace_function_probes(tr);
7941         event_trace_del_tracer(tr);
7942         ftrace_clear_pids(tr);
7943         ftrace_destroy_function_files(tr);
7944         tracefs_remove_recursive(tr->dir);
7945         free_trace_buffers(tr);
7946
7947         for (i = 0; i < tr->nr_topts; i++) {
7948                 kfree(tr->topts[i].topts);
7949         }
7950         kfree(tr->topts);
7951
7952         free_cpumask_var(tr->tracing_cpumask);
7953         kfree(tr->name);
7954         kfree(tr);
7955
7956         ret = 0;
7957
7958  out_unlock:
7959         mutex_unlock(&trace_types_lock);
7960         mutex_unlock(&event_mutex);
7961
7962         return ret;
7963 }
7964
7965 static __init void create_trace_instances(struct dentry *d_tracer)
7966 {
7967         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7968                                                          instance_mkdir,
7969                                                          instance_rmdir);
7970         if (WARN_ON(!trace_instance_dir))
7971                 return;
7972 }
7973
7974 static void
7975 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7976 {
7977         struct trace_event_file *file;
7978         int cpu;
7979
7980         trace_create_file("available_tracers", 0444, d_tracer,
7981                         tr, &show_traces_fops);
7982
7983         trace_create_file("current_tracer", 0644, d_tracer,
7984                         tr, &set_tracer_fops);
7985
7986         trace_create_file("tracing_cpumask", 0644, d_tracer,
7987                           tr, &tracing_cpumask_fops);
7988
7989         trace_create_file("trace_options", 0644, d_tracer,
7990                           tr, &tracing_iter_fops);
7991
7992         trace_create_file("trace", 0644, d_tracer,
7993                           tr, &tracing_fops);
7994
7995         trace_create_file("trace_pipe", 0444, d_tracer,
7996                           tr, &tracing_pipe_fops);
7997
7998         trace_create_file("buffer_size_kb", 0644, d_tracer,
7999                           tr, &tracing_entries_fops);
8000
8001         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8002                           tr, &tracing_total_entries_fops);
8003
8004         trace_create_file("free_buffer", 0200, d_tracer,
8005                           tr, &tracing_free_buffer_fops);
8006
8007         trace_create_file("trace_marker", 0220, d_tracer,
8008                           tr, &tracing_mark_fops);
8009
8010         file = __find_event_file(tr, "ftrace", "print");
8011         if (file && file->dir)
8012                 trace_create_file("trigger", 0644, file->dir, file,
8013                                   &event_trigger_fops);
8014         tr->trace_marker_file = file;
8015
8016         trace_create_file("trace_marker_raw", 0220, d_tracer,
8017                           tr, &tracing_mark_raw_fops);
8018
8019         trace_create_file("trace_clock", 0644, d_tracer, tr,
8020                           &trace_clock_fops);
8021
8022         trace_create_file("tracing_on", 0644, d_tracer,
8023                           tr, &rb_simple_fops);
8024
8025         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8026                           &trace_time_stamp_mode_fops);
8027
8028         create_trace_options_dir(tr);
8029
8030 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8031         trace_create_file("tracing_max_latency", 0644, d_tracer,
8032                         &tr->max_latency, &tracing_max_lat_fops);
8033 #endif
8034
8035         if (ftrace_create_function_files(tr, d_tracer))
8036                 WARN(1, "Could not allocate function filter files");
8037
8038 #ifdef CONFIG_TRACER_SNAPSHOT
8039         trace_create_file("snapshot", 0644, d_tracer,
8040                           tr, &snapshot_fops);
8041 #endif
8042
8043         for_each_tracing_cpu(cpu)
8044                 tracing_init_tracefs_percpu(tr, cpu);
8045
8046         ftrace_init_tracefs(tr, d_tracer);
8047 }
8048
8049 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8050 {
8051         struct vfsmount *mnt;
8052         struct file_system_type *type;
8053
8054         /*
8055          * To maintain backward compatibility for tools that mount
8056          * debugfs to get to the tracing facility, tracefs is automatically
8057          * mounted to the debugfs/tracing directory.
8058          */
8059         type = get_fs_type("tracefs");
8060         if (!type)
8061                 return NULL;
8062         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8063         put_filesystem(type);
8064         if (IS_ERR(mnt))
8065                 return NULL;
8066         mntget(mnt);
8067
8068         return mnt;
8069 }
8070
8071 /**
8072  * tracing_init_dentry - initialize top level trace array
8073  *
8074  * This is called when creating files or directories in the tracing
8075  * directory. It is called via fs_initcall() by any of the boot up code
8076  * and expects to return the dentry of the top level tracing directory.
8077  */
8078 struct dentry *tracing_init_dentry(void)
8079 {
8080         struct trace_array *tr = &global_trace;
8081
8082         /* The top level trace array uses  NULL as parent */
8083         if (tr->dir)
8084                 return NULL;
8085
8086         if (WARN_ON(!tracefs_initialized()) ||
8087                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8088                  WARN_ON(!debugfs_initialized())))
8089                 return ERR_PTR(-ENODEV);
8090
8091         /*
8092          * As there may still be users that expect the tracing
8093          * files to exist in debugfs/tracing, we must automount
8094          * the tracefs file system there, so older tools still
8095          * work with the newer kerenl.
8096          */
8097         tr->dir = debugfs_create_automount("tracing", NULL,
8098                                            trace_automount, NULL);
8099         if (!tr->dir) {
8100                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8101                 return ERR_PTR(-ENOMEM);
8102         }
8103
8104         return NULL;
8105 }
8106
8107 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8108 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8109
8110 static void __init trace_eval_init(void)
8111 {
8112         int len;
8113
8114         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8115         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8116 }
8117
8118 #ifdef CONFIG_MODULES
8119 static void trace_module_add_evals(struct module *mod)
8120 {
8121         if (!mod->num_trace_evals)
8122                 return;
8123
8124         /*
8125          * Modules with bad taint do not have events created, do
8126          * not bother with enums either.
8127          */
8128         if (trace_module_has_bad_taint(mod))
8129                 return;
8130
8131         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8132 }
8133
8134 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8135 static void trace_module_remove_evals(struct module *mod)
8136 {
8137         union trace_eval_map_item *map;
8138         union trace_eval_map_item **last = &trace_eval_maps;
8139
8140         if (!mod->num_trace_evals)
8141                 return;
8142
8143         mutex_lock(&trace_eval_mutex);
8144
8145         map = trace_eval_maps;
8146
8147         while (map) {
8148                 if (map->head.mod == mod)
8149                         break;
8150                 map = trace_eval_jmp_to_tail(map);
8151                 last = &map->tail.next;
8152                 map = map->tail.next;
8153         }
8154         if (!map)
8155                 goto out;
8156
8157         *last = trace_eval_jmp_to_tail(map)->tail.next;
8158         kfree(map);
8159  out:
8160         mutex_unlock(&trace_eval_mutex);
8161 }
8162 #else
8163 static inline void trace_module_remove_evals(struct module *mod) { }
8164 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8165
8166 static int trace_module_notify(struct notifier_block *self,
8167                                unsigned long val, void *data)
8168 {
8169         struct module *mod = data;
8170
8171         switch (val) {
8172         case MODULE_STATE_COMING:
8173                 trace_module_add_evals(mod);
8174                 break;
8175         case MODULE_STATE_GOING:
8176                 trace_module_remove_evals(mod);
8177                 break;
8178         }
8179
8180         return 0;
8181 }
8182
8183 static struct notifier_block trace_module_nb = {
8184         .notifier_call = trace_module_notify,
8185         .priority = 0,
8186 };
8187 #endif /* CONFIG_MODULES */
8188
8189 static __init int tracer_init_tracefs(void)
8190 {
8191         struct dentry *d_tracer;
8192
8193         trace_access_lock_init();
8194
8195         d_tracer = tracing_init_dentry();
8196         if (IS_ERR(d_tracer))
8197                 return 0;
8198
8199         event_trace_init();
8200
8201         init_tracer_tracefs(&global_trace, d_tracer);
8202         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8203
8204         trace_create_file("tracing_thresh", 0644, d_tracer,
8205                         &global_trace, &tracing_thresh_fops);
8206
8207         trace_create_file("README", 0444, d_tracer,
8208                         NULL, &tracing_readme_fops);
8209
8210         trace_create_file("saved_cmdlines", 0444, d_tracer,
8211                         NULL, &tracing_saved_cmdlines_fops);
8212
8213         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8214                           NULL, &tracing_saved_cmdlines_size_fops);
8215
8216         trace_create_file("saved_tgids", 0444, d_tracer,
8217                         NULL, &tracing_saved_tgids_fops);
8218
8219         trace_eval_init();
8220
8221         trace_create_eval_file(d_tracer);
8222
8223 #ifdef CONFIG_MODULES
8224         register_module_notifier(&trace_module_nb);
8225 #endif
8226
8227 #ifdef CONFIG_DYNAMIC_FTRACE
8228         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8229                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8230 #endif
8231
8232         create_trace_instances(d_tracer);
8233
8234         update_tracer_options(&global_trace);
8235
8236         return 0;
8237 }
8238
8239 static int trace_panic_handler(struct notifier_block *this,
8240                                unsigned long event, void *unused)
8241 {
8242         if (ftrace_dump_on_oops)
8243                 ftrace_dump(ftrace_dump_on_oops);
8244         return NOTIFY_OK;
8245 }
8246
8247 static struct notifier_block trace_panic_notifier = {
8248         .notifier_call  = trace_panic_handler,
8249         .next           = NULL,
8250         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8251 };
8252
8253 static int trace_die_handler(struct notifier_block *self,
8254                              unsigned long val,
8255                              void *data)
8256 {
8257         switch (val) {
8258         case DIE_OOPS:
8259                 if (ftrace_dump_on_oops)
8260                         ftrace_dump(ftrace_dump_on_oops);
8261                 break;
8262         default:
8263                 break;
8264         }
8265         return NOTIFY_OK;
8266 }
8267
8268 static struct notifier_block trace_die_notifier = {
8269         .notifier_call = trace_die_handler,
8270         .priority = 200
8271 };
8272
8273 /*
8274  * printk is set to max of 1024, we really don't need it that big.
8275  * Nothing should be printing 1000 characters anyway.
8276  */
8277 #define TRACE_MAX_PRINT         1000
8278
8279 /*
8280  * Define here KERN_TRACE so that we have one place to modify
8281  * it if we decide to change what log level the ftrace dump
8282  * should be at.
8283  */
8284 #define KERN_TRACE              KERN_EMERG
8285
8286 void
8287 trace_printk_seq(struct trace_seq *s)
8288 {
8289         /* Probably should print a warning here. */
8290         if (s->seq.len >= TRACE_MAX_PRINT)
8291                 s->seq.len = TRACE_MAX_PRINT;
8292
8293         /*
8294          * More paranoid code. Although the buffer size is set to
8295          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8296          * an extra layer of protection.
8297          */
8298         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8299                 s->seq.len = s->seq.size - 1;
8300
8301         /* should be zero ended, but we are paranoid. */
8302         s->buffer[s->seq.len] = 0;
8303
8304         printk(KERN_TRACE "%s", s->buffer);
8305
8306         trace_seq_init(s);
8307 }
8308
8309 void trace_init_global_iter(struct trace_iterator *iter)
8310 {
8311         iter->tr = &global_trace;
8312         iter->trace = iter->tr->current_trace;
8313         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8314         iter->trace_buffer = &global_trace.trace_buffer;
8315
8316         if (iter->trace && iter->trace->open)
8317                 iter->trace->open(iter);
8318
8319         /* Annotate start of buffers if we had overruns */
8320         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8321                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8322
8323         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8324         if (trace_clocks[iter->tr->clock_id].in_ns)
8325                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8326 }
8327
8328 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8329 {
8330         /* use static because iter can be a bit big for the stack */
8331         static struct trace_iterator iter;
8332         static atomic_t dump_running;
8333         struct trace_array *tr = &global_trace;
8334         unsigned int old_userobj;
8335         unsigned long flags;
8336         int cnt = 0, cpu;
8337
8338         /* Only allow one dump user at a time. */
8339         if (atomic_inc_return(&dump_running) != 1) {
8340                 atomic_dec(&dump_running);
8341                 return;
8342         }
8343
8344         /*
8345          * Always turn off tracing when we dump.
8346          * We don't need to show trace output of what happens
8347          * between multiple crashes.
8348          *
8349          * If the user does a sysrq-z, then they can re-enable
8350          * tracing with echo 1 > tracing_on.
8351          */
8352         tracing_off();
8353
8354         local_irq_save(flags);
8355         printk_nmi_direct_enter();
8356
8357         /* Simulate the iterator */
8358         trace_init_global_iter(&iter);
8359
8360         for_each_tracing_cpu(cpu) {
8361                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8362         }
8363
8364         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8365
8366         /* don't look at user memory in panic mode */
8367         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8368
8369         switch (oops_dump_mode) {
8370         case DUMP_ALL:
8371                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8372                 break;
8373         case DUMP_ORIG:
8374                 iter.cpu_file = raw_smp_processor_id();
8375                 break;
8376         case DUMP_NONE:
8377                 goto out_enable;
8378         default:
8379                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8380                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8381         }
8382
8383         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8384
8385         /* Did function tracer already get disabled? */
8386         if (ftrace_is_dead()) {
8387                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8388                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8389         }
8390
8391         /*
8392          * We need to stop all tracing on all CPUS to read the
8393          * the next buffer. This is a bit expensive, but is
8394          * not done often. We fill all what we can read,
8395          * and then release the locks again.
8396          */
8397
8398         while (!trace_empty(&iter)) {
8399
8400                 if (!cnt)
8401                         printk(KERN_TRACE "---------------------------------\n");
8402
8403                 cnt++;
8404
8405                 trace_iterator_reset(&iter);
8406                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8407
8408                 if (trace_find_next_entry_inc(&iter) != NULL) {
8409                         int ret;
8410
8411                         ret = print_trace_line(&iter);
8412                         if (ret != TRACE_TYPE_NO_CONSUME)
8413                                 trace_consume(&iter);
8414                 }
8415                 touch_nmi_watchdog();
8416
8417                 trace_printk_seq(&iter.seq);
8418         }
8419
8420         if (!cnt)
8421                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8422         else
8423                 printk(KERN_TRACE "---------------------------------\n");
8424
8425  out_enable:
8426         tr->trace_flags |= old_userobj;
8427
8428         for_each_tracing_cpu(cpu) {
8429                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8430         }
8431         atomic_dec(&dump_running);
8432         printk_nmi_direct_exit();
8433         local_irq_restore(flags);
8434 }
8435 EXPORT_SYMBOL_GPL(ftrace_dump);
8436
8437 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8438 {
8439         char **argv;
8440         int argc, ret;
8441
8442         argc = 0;
8443         ret = 0;
8444         argv = argv_split(GFP_KERNEL, buf, &argc);
8445         if (!argv)
8446                 return -ENOMEM;
8447
8448         if (argc)
8449                 ret = createfn(argc, argv);
8450
8451         argv_free(argv);
8452
8453         return ret;
8454 }
8455
8456 #define WRITE_BUFSIZE  4096
8457
8458 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8459                                 size_t count, loff_t *ppos,
8460                                 int (*createfn)(int, char **))
8461 {
8462         char *kbuf, *buf, *tmp;
8463         int ret = 0;
8464         size_t done = 0;
8465         size_t size;
8466
8467         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8468         if (!kbuf)
8469                 return -ENOMEM;
8470
8471         while (done < count) {
8472                 size = count - done;
8473
8474                 if (size >= WRITE_BUFSIZE)
8475                         size = WRITE_BUFSIZE - 1;
8476
8477                 if (copy_from_user(kbuf, buffer + done, size)) {
8478                         ret = -EFAULT;
8479                         goto out;
8480                 }
8481                 kbuf[size] = '\0';
8482                 buf = kbuf;
8483                 do {
8484                         tmp = strchr(buf, '\n');
8485                         if (tmp) {
8486                                 *tmp = '\0';
8487                                 size = tmp - buf + 1;
8488                         } else {
8489                                 size = strlen(buf);
8490                                 if (done + size < count) {
8491                                         if (buf != kbuf)
8492                                                 break;
8493                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8494                                         pr_warn("Line length is too long: Should be less than %d\n",
8495                                                 WRITE_BUFSIZE - 2);
8496                                         ret = -EINVAL;
8497                                         goto out;
8498                                 }
8499                         }
8500                         done += size;
8501
8502                         /* Remove comments */
8503                         tmp = strchr(buf, '#');
8504
8505                         if (tmp)
8506                                 *tmp = '\0';
8507
8508                         ret = trace_run_command(buf, createfn);
8509                         if (ret)
8510                                 goto out;
8511                         buf += size;
8512
8513                 } while (done < count);
8514         }
8515         ret = done;
8516
8517 out:
8518         kfree(kbuf);
8519
8520         return ret;
8521 }
8522
8523 __init static int tracer_alloc_buffers(void)
8524 {
8525         int ring_buf_size;
8526         int ret = -ENOMEM;
8527
8528         /*
8529          * Make sure we don't accidently add more trace options
8530          * than we have bits for.
8531          */
8532         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8533
8534         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8535                 goto out;
8536
8537         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8538                 goto out_free_buffer_mask;
8539
8540         /* Only allocate trace_printk buffers if a trace_printk exists */
8541         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
8542                 /* Must be called before global_trace.buffer is allocated */
8543                 trace_printk_init_buffers();
8544
8545         /* To save memory, keep the ring buffer size to its minimum */
8546         if (ring_buffer_expanded)
8547                 ring_buf_size = trace_buf_size;
8548         else
8549                 ring_buf_size = 1;
8550
8551         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8552         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8553
8554         raw_spin_lock_init(&global_trace.start_lock);
8555
8556         /*
8557          * The prepare callbacks allocates some memory for the ring buffer. We
8558          * don't free the buffer if the if the CPU goes down. If we were to free
8559          * the buffer, then the user would lose any trace that was in the
8560          * buffer. The memory will be removed once the "instance" is removed.
8561          */
8562         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8563                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8564                                       NULL);
8565         if (ret < 0)
8566                 goto out_free_cpumask;
8567         /* Used for event triggers */
8568         ret = -ENOMEM;
8569         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8570         if (!temp_buffer)
8571                 goto out_rm_hp_state;
8572
8573         if (trace_create_savedcmd() < 0)
8574                 goto out_free_temp_buffer;
8575
8576         /* TODO: make the number of buffers hot pluggable with CPUS */
8577         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8578                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8579                 WARN_ON(1);
8580                 goto out_free_savedcmd;
8581         }
8582
8583         if (global_trace.buffer_disabled)
8584                 tracing_off();
8585
8586         if (trace_boot_clock) {
8587                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8588                 if (ret < 0)
8589                         pr_warn("Trace clock %s not defined, going back to default\n",
8590                                 trace_boot_clock);
8591         }
8592
8593         /*
8594          * register_tracer() might reference current_trace, so it
8595          * needs to be set before we register anything. This is
8596          * just a bootstrap of current_trace anyway.
8597          */
8598         global_trace.current_trace = &nop_trace;
8599
8600         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8601
8602         ftrace_init_global_array_ops(&global_trace);
8603
8604         init_trace_flags_index(&global_trace);
8605
8606         register_tracer(&nop_trace);
8607
8608         /* Function tracing may start here (via kernel command line) */
8609         init_function_trace();
8610
8611         /* All seems OK, enable tracing */
8612         tracing_disabled = 0;
8613
8614         atomic_notifier_chain_register(&panic_notifier_list,
8615                                        &trace_panic_notifier);
8616
8617         register_die_notifier(&trace_die_notifier);
8618
8619         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8620
8621         INIT_LIST_HEAD(&global_trace.systems);
8622         INIT_LIST_HEAD(&global_trace.events);
8623         INIT_LIST_HEAD(&global_trace.hist_vars);
8624         list_add(&global_trace.list, &ftrace_trace_arrays);
8625
8626         apply_trace_boot_options();
8627
8628         register_snapshot_cmd();
8629
8630         return 0;
8631
8632 out_free_savedcmd:
8633         free_saved_cmdlines_buffer(savedcmd);
8634 out_free_temp_buffer:
8635         ring_buffer_free(temp_buffer);
8636 out_rm_hp_state:
8637         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8638 out_free_cpumask:
8639         free_cpumask_var(global_trace.tracing_cpumask);
8640 out_free_buffer_mask:
8641         free_cpumask_var(tracing_buffer_mask);
8642 out:
8643         return ret;
8644 }
8645
8646 void __init early_trace_init(void)
8647 {
8648         if (tracepoint_printk) {
8649                 tracepoint_print_iter =
8650                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8651                 if (WARN_ON(!tracepoint_print_iter))
8652                         tracepoint_printk = 0;
8653                 else
8654                         static_key_enable(&tracepoint_printk_key.key);
8655         }
8656         tracer_alloc_buffers();
8657 }
8658
8659 void __init trace_init(void)
8660 {
8661         trace_event_init();
8662 }
8663
8664 __init static int clear_boot_tracer(void)
8665 {
8666         /*
8667          * The default tracer at boot buffer is an init section.
8668          * This function is called in lateinit. If we did not
8669          * find the boot tracer, then clear it out, to prevent
8670          * later registration from accessing the buffer that is
8671          * about to be freed.
8672          */
8673         if (!default_bootup_tracer)
8674                 return 0;
8675
8676         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8677                default_bootup_tracer);
8678         default_bootup_tracer = NULL;
8679
8680         return 0;
8681 }
8682
8683 fs_initcall(tracer_init_tracefs);
8684 late_initcall_sync(clear_boot_tracer);
8685
8686 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8687 __init static int tracing_set_default_clock(void)
8688 {
8689         /* sched_clock_stable() is determined in late_initcall */
8690         if (!trace_boot_clock && !sched_clock_stable()) {
8691                 printk(KERN_WARNING
8692                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8693                        "If you want to keep using the local clock, then add:\n"
8694                        "  \"trace_clock=local\"\n"
8695                        "on the kernel command line\n");
8696                 tracing_set_clock(&global_trace, "global");
8697         }
8698
8699         return 0;
8700 }
8701 late_initcall_sync(tracing_set_default_clock);
8702 #endif