GNU Linux-libre 5.15.137-gnu
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 1;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 1;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         /* Ignore the "tp_printk_stop_on_boot" param */
256         if (*str == '_')
257                 return 0;
258
259         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260                 tracepoint_printk = 1;
261         return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264
265 static int __init set_tracepoint_printk_stop(char *str)
266 {
267         tracepoint_printk_stop_on_boot = true;
268         return 1;
269 }
270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
271
272 unsigned long long ns2usecs(u64 nsec)
273 {
274         nsec += 500;
275         do_div(nsec, 1000);
276         return nsec;
277 }
278
279 static void
280 trace_process_export(struct trace_export *export,
281                struct ring_buffer_event *event, int flag)
282 {
283         struct trace_entry *entry;
284         unsigned int size = 0;
285
286         if (export->flags & flag) {
287                 entry = ring_buffer_event_data(event);
288                 size = ring_buffer_event_length(event);
289                 export->write(export, entry, size);
290         }
291 }
292
293 static DEFINE_MUTEX(ftrace_export_lock);
294
295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
296
297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
300
301 static inline void ftrace_exports_enable(struct trace_export *export)
302 {
303         if (export->flags & TRACE_EXPORT_FUNCTION)
304                 static_branch_inc(&trace_function_exports_enabled);
305
306         if (export->flags & TRACE_EXPORT_EVENT)
307                 static_branch_inc(&trace_event_exports_enabled);
308
309         if (export->flags & TRACE_EXPORT_MARKER)
310                 static_branch_inc(&trace_marker_exports_enabled);
311 }
312
313 static inline void ftrace_exports_disable(struct trace_export *export)
314 {
315         if (export->flags & TRACE_EXPORT_FUNCTION)
316                 static_branch_dec(&trace_function_exports_enabled);
317
318         if (export->flags & TRACE_EXPORT_EVENT)
319                 static_branch_dec(&trace_event_exports_enabled);
320
321         if (export->flags & TRACE_EXPORT_MARKER)
322                 static_branch_dec(&trace_marker_exports_enabled);
323 }
324
325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
326 {
327         struct trace_export *export;
328
329         preempt_disable_notrace();
330
331         export = rcu_dereference_raw_check(ftrace_exports_list);
332         while (export) {
333                 trace_process_export(export, event, flag);
334                 export = rcu_dereference_raw_check(export->next);
335         }
336
337         preempt_enable_notrace();
338 }
339
340 static inline void
341 add_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         rcu_assign_pointer(export->next, *list);
344         /*
345          * We are entering export into the list but another
346          * CPU might be walking that list. We need to make sure
347          * the export->next pointer is valid before another CPU sees
348          * the export pointer included into the list.
349          */
350         rcu_assign_pointer(*list, export);
351 }
352
353 static inline int
354 rm_trace_export(struct trace_export **list, struct trace_export *export)
355 {
356         struct trace_export **p;
357
358         for (p = list; *p != NULL; p = &(*p)->next)
359                 if (*p == export)
360                         break;
361
362         if (*p != export)
363                 return -1;
364
365         rcu_assign_pointer(*p, (*p)->next);
366
367         return 0;
368 }
369
370 static inline void
371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373         ftrace_exports_enable(export);
374
375         add_trace_export(list, export);
376 }
377
378 static inline int
379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
380 {
381         int ret;
382
383         ret = rm_trace_export(list, export);
384         ftrace_exports_disable(export);
385
386         return ret;
387 }
388
389 int register_ftrace_export(struct trace_export *export)
390 {
391         if (WARN_ON_ONCE(!export->write))
392                 return -1;
393
394         mutex_lock(&ftrace_export_lock);
395
396         add_ftrace_export(&ftrace_exports_list, export);
397
398         mutex_unlock(&ftrace_export_lock);
399
400         return 0;
401 }
402 EXPORT_SYMBOL_GPL(register_ftrace_export);
403
404 int unregister_ftrace_export(struct trace_export *export)
405 {
406         int ret;
407
408         mutex_lock(&ftrace_export_lock);
409
410         ret = rm_ftrace_export(&ftrace_exports_list, export);
411
412         mutex_unlock(&ftrace_export_lock);
413
414         return ret;
415 }
416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
417
418 /* trace_flags holds trace_options default values */
419 #define TRACE_DEFAULT_FLAGS                                             \
420         (FUNCTION_DEFAULT_FLAGS |                                       \
421          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
422          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
423          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
424          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
425          TRACE_ITER_HASH_PTR)
426
427 /* trace_options that are only supported by global_trace */
428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
429                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
430
431 /* trace_flags that are default zero for instances */
432 #define ZEROED_TRACE_FLAGS \
433         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
434
435 /*
436  * The global_trace is the descriptor that holds the top-level tracing
437  * buffers for the live tracing.
438  */
439 static struct trace_array global_trace = {
440         .trace_flags = TRACE_DEFAULT_FLAGS,
441 };
442
443 LIST_HEAD(ftrace_trace_arrays);
444
445 int trace_array_get(struct trace_array *this_tr)
446 {
447         struct trace_array *tr;
448         int ret = -ENODEV;
449
450         mutex_lock(&trace_types_lock);
451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
452                 if (tr == this_tr) {
453                         tr->ref++;
454                         ret = 0;
455                         break;
456                 }
457         }
458         mutex_unlock(&trace_types_lock);
459
460         return ret;
461 }
462
463 static void __trace_array_put(struct trace_array *this_tr)
464 {
465         WARN_ON(!this_tr->ref);
466         this_tr->ref--;
467 }
468
469 /**
470  * trace_array_put - Decrement the reference counter for this trace array.
471  * @this_tr : pointer to the trace array
472  *
473  * NOTE: Use this when we no longer need the trace array returned by
474  * trace_array_get_by_name(). This ensures the trace array can be later
475  * destroyed.
476  *
477  */
478 void trace_array_put(struct trace_array *this_tr)
479 {
480         if (!this_tr)
481                 return;
482
483         mutex_lock(&trace_types_lock);
484         __trace_array_put(this_tr);
485         mutex_unlock(&trace_types_lock);
486 }
487 EXPORT_SYMBOL_GPL(trace_array_put);
488
489 int tracing_check_open_get_tr(struct trace_array *tr)
490 {
491         int ret;
492
493         ret = security_locked_down(LOCKDOWN_TRACEFS);
494         if (ret)
495                 return ret;
496
497         if (tracing_disabled)
498                 return -ENODEV;
499
500         if (tr && trace_array_get(tr) < 0)
501                 return -ENODEV;
502
503         return 0;
504 }
505
506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
507                               struct trace_buffer *buffer,
508                               struct ring_buffer_event *event)
509 {
510         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
511             !filter_match_preds(call->filter, rec)) {
512                 __trace_event_discard_commit(buffer, event);
513                 return 1;
514         }
515
516         return 0;
517 }
518
519 /**
520  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
521  * @filtered_pids: The list of pids to check
522  * @search_pid: The PID to find in @filtered_pids
523  *
524  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
525  */
526 bool
527 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
528 {
529         return trace_pid_list_is_set(filtered_pids, search_pid);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* "self" is set for forks, and NULL for exits */
587         if (self)
588                 trace_pid_list_set(pid_list, task->pid);
589         else
590                 trace_pid_list_clear(pid_list, task->pid);
591 }
592
593 /**
594  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
595  * @pid_list: The pid list to show
596  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
597  * @pos: The position of the file
598  *
599  * This is used by the seq_file "next" operation to iterate the pids
600  * listed in a trace_pid_list structure.
601  *
602  * Returns the pid+1 as we want to display pid of zero, but NULL would
603  * stop the iteration.
604  */
605 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
606 {
607         long pid = (unsigned long)v;
608         unsigned int next;
609
610         (*pos)++;
611
612         /* pid already is +1 of the actual previous bit */
613         if (trace_pid_list_next(pid_list, pid, &next) < 0)
614                 return NULL;
615
616         pid = next;
617
618         /* Return pid + 1 to allow zero to be represented */
619         return (void *)(pid + 1);
620 }
621
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635         unsigned long pid;
636         unsigned int first;
637         loff_t l = 0;
638
639         if (trace_pid_list_first(pid_list, &first) < 0)
640                 return NULL;
641
642         pid = first;
643
644         /* Return pid + 1 so that zero can be the exit value */
645         for (pid++; pid && l < *pos;
646              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
647                 ;
648         return (void *)pid;
649 }
650
651 /**
652  * trace_pid_show - show the current pid in seq_file processing
653  * @m: The seq_file structure to write into
654  * @v: A void pointer of the pid (+1) value to display
655  *
656  * Can be directly used by seq_file operations to display the current
657  * pid value.
658  */
659 int trace_pid_show(struct seq_file *m, void *v)
660 {
661         unsigned long pid = (unsigned long)v - 1;
662
663         seq_printf(m, "%lu\n", pid);
664         return 0;
665 }
666
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE            127
669
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671                     struct trace_pid_list **new_pid_list,
672                     const char __user *ubuf, size_t cnt)
673 {
674         struct trace_pid_list *pid_list;
675         struct trace_parser parser;
676         unsigned long val;
677         int nr_pids = 0;
678         ssize_t read = 0;
679         ssize_t ret;
680         loff_t pos;
681         pid_t pid;
682
683         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
684                 return -ENOMEM;
685
686         /*
687          * Always recreate a new array. The write is an all or nothing
688          * operation. Always create a new array when adding new pids by
689          * the user. If the operation fails, then the current list is
690          * not modified.
691          */
692         pid_list = trace_pid_list_alloc();
693         if (!pid_list) {
694                 trace_parser_put(&parser);
695                 return -ENOMEM;
696         }
697
698         if (filtered_pids) {
699                 /* copy the current bits to the new max */
700                 ret = trace_pid_list_first(filtered_pids, &pid);
701                 while (!ret) {
702                         trace_pid_list_set(pid_list, pid);
703                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
704                         nr_pids++;
705                 }
706         }
707
708         ret = 0;
709         while (cnt > 0) {
710
711                 pos = 0;
712
713                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
714                 if (ret < 0)
715                         break;
716
717                 read += ret;
718                 ubuf += ret;
719                 cnt -= ret;
720
721                 if (!trace_parser_loaded(&parser))
722                         break;
723
724                 ret = -EINVAL;
725                 if (kstrtoul(parser.buffer, 0, &val))
726                         break;
727
728                 pid = (pid_t)val;
729
730                 if (trace_pid_list_set(pid_list, pid) < 0) {
731                         ret = -1;
732                         break;
733                 }
734                 nr_pids++;
735
736                 trace_parser_clear(&parser);
737                 ret = 0;
738         }
739         trace_parser_put(&parser);
740
741         if (ret < 0) {
742                 trace_pid_list_free(pid_list);
743                 return ret;
744         }
745
746         if (!nr_pids) {
747                 /* Cleared the list of pids */
748                 trace_pid_list_free(pid_list);
749                 pid_list = NULL;
750         }
751
752         *new_pid_list = pid_list;
753
754         return read;
755 }
756
757 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
758 {
759         u64 ts;
760
761         /* Early boot up does not have a buffer yet */
762         if (!buf->buffer)
763                 return trace_clock_local();
764
765         ts = ring_buffer_time_stamp(buf->buffer);
766         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
767
768         return ts;
769 }
770
771 u64 ftrace_now(int cpu)
772 {
773         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
774 }
775
776 /**
777  * tracing_is_enabled - Show if global_trace has been enabled
778  *
779  * Shows if the global trace has been enabled or not. It uses the
780  * mirror flag "buffer_disabled" to be used in fast paths such as for
781  * the irqsoff tracer. But it may be inaccurate due to races. If you
782  * need to know the accurate state, use tracing_is_on() which is a little
783  * slower, but accurate.
784  */
785 int tracing_is_enabled(void)
786 {
787         /*
788          * For quick access (irqsoff uses this in fast path), just
789          * return the mirror variable of the state of the ring buffer.
790          * It's a little racy, but we don't really care.
791          */
792         smp_rmb();
793         return !global_trace.buffer_disabled;
794 }
795
796 /*
797  * trace_buf_size is the size in bytes that is allocated
798  * for a buffer. Note, the number of bytes is always rounded
799  * to page size.
800  *
801  * This number is purposely set to a low number of 16384.
802  * If the dump on oops happens, it will be much appreciated
803  * to not have to wait for all that output. Anyway this can be
804  * boot time and run time configurable.
805  */
806 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
807
808 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
809
810 /* trace_types holds a link list of available tracers. */
811 static struct tracer            *trace_types __read_mostly;
812
813 /*
814  * trace_types_lock is used to protect the trace_types list.
815  */
816 DEFINE_MUTEX(trace_types_lock);
817
818 /*
819  * serialize the access of the ring buffer
820  *
821  * ring buffer serializes readers, but it is low level protection.
822  * The validity of the events (which returns by ring_buffer_peek() ..etc)
823  * are not protected by ring buffer.
824  *
825  * The content of events may become garbage if we allow other process consumes
826  * these events concurrently:
827  *   A) the page of the consumed events may become a normal page
828  *      (not reader page) in ring buffer, and this page will be rewritten
829  *      by events producer.
830  *   B) The page of the consumed events may become a page for splice_read,
831  *      and this page will be returned to system.
832  *
833  * These primitives allow multi process access to different cpu ring buffer
834  * concurrently.
835  *
836  * These primitives don't distinguish read-only and read-consume access.
837  * Multi read-only access are also serialized.
838  */
839
840 #ifdef CONFIG_SMP
841 static DECLARE_RWSEM(all_cpu_access_lock);
842 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
843
844 static inline void trace_access_lock(int cpu)
845 {
846         if (cpu == RING_BUFFER_ALL_CPUS) {
847                 /* gain it for accessing the whole ring buffer. */
848                 down_write(&all_cpu_access_lock);
849         } else {
850                 /* gain it for accessing a cpu ring buffer. */
851
852                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
853                 down_read(&all_cpu_access_lock);
854
855                 /* Secondly block other access to this @cpu ring buffer. */
856                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
857         }
858 }
859
860 static inline void trace_access_unlock(int cpu)
861 {
862         if (cpu == RING_BUFFER_ALL_CPUS) {
863                 up_write(&all_cpu_access_lock);
864         } else {
865                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
866                 up_read(&all_cpu_access_lock);
867         }
868 }
869
870 static inline void trace_access_lock_init(void)
871 {
872         int cpu;
873
874         for_each_possible_cpu(cpu)
875                 mutex_init(&per_cpu(cpu_access_lock, cpu));
876 }
877
878 #else
879
880 static DEFINE_MUTEX(access_lock);
881
882 static inline void trace_access_lock(int cpu)
883 {
884         (void)cpu;
885         mutex_lock(&access_lock);
886 }
887
888 static inline void trace_access_unlock(int cpu)
889 {
890         (void)cpu;
891         mutex_unlock(&access_lock);
892 }
893
894 static inline void trace_access_lock_init(void)
895 {
896 }
897
898 #endif
899
900 #ifdef CONFIG_STACKTRACE
901 static void __ftrace_trace_stack(struct trace_buffer *buffer,
902                                  unsigned int trace_ctx,
903                                  int skip, struct pt_regs *regs);
904 static inline void ftrace_trace_stack(struct trace_array *tr,
905                                       struct trace_buffer *buffer,
906                                       unsigned int trace_ctx,
907                                       int skip, struct pt_regs *regs);
908
909 #else
910 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                         unsigned int trace_ctx,
912                                         int skip, struct pt_regs *regs)
913 {
914 }
915 static inline void ftrace_trace_stack(struct trace_array *tr,
916                                       struct trace_buffer *buffer,
917                                       unsigned long trace_ctx,
918                                       int skip, struct pt_regs *regs)
919 {
920 }
921
922 #endif
923
924 static __always_inline void
925 trace_event_setup(struct ring_buffer_event *event,
926                   int type, unsigned int trace_ctx)
927 {
928         struct trace_entry *ent = ring_buffer_event_data(event);
929
930         tracing_generic_entry_update(ent, type, trace_ctx);
931 }
932
933 static __always_inline struct ring_buffer_event *
934 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
935                           int type,
936                           unsigned long len,
937                           unsigned int trace_ctx)
938 {
939         struct ring_buffer_event *event;
940
941         event = ring_buffer_lock_reserve(buffer, len);
942         if (event != NULL)
943                 trace_event_setup(event, type, trace_ctx);
944
945         return event;
946 }
947
948 void tracer_tracing_on(struct trace_array *tr)
949 {
950         if (tr->array_buffer.buffer)
951                 ring_buffer_record_on(tr->array_buffer.buffer);
952         /*
953          * This flag is looked at when buffers haven't been allocated
954          * yet, or by some tracers (like irqsoff), that just want to
955          * know if the ring buffer has been disabled, but it can handle
956          * races of where it gets disabled but we still do a record.
957          * As the check is in the fast path of the tracers, it is more
958          * important to be fast than accurate.
959          */
960         tr->buffer_disabled = 0;
961         /* Make the flag seen by readers */
962         smp_wmb();
963 }
964
965 /**
966  * tracing_on - enable tracing buffers
967  *
968  * This function enables tracing buffers that may have been
969  * disabled with tracing_off.
970  */
971 void tracing_on(void)
972 {
973         tracer_tracing_on(&global_trace);
974 }
975 EXPORT_SYMBOL_GPL(tracing_on);
976
977
978 static __always_inline void
979 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
980 {
981         __this_cpu_write(trace_taskinfo_save, true);
982
983         /* If this is the temp buffer, we need to commit fully */
984         if (this_cpu_read(trace_buffered_event) == event) {
985                 /* Length is in event->array[0] */
986                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
987                 /* Release the temp buffer */
988                 this_cpu_dec(trace_buffered_event_cnt);
989         } else
990                 ring_buffer_unlock_commit(buffer, event);
991 }
992
993 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
994                        const char *str, int size)
995 {
996         struct ring_buffer_event *event;
997         struct trace_buffer *buffer;
998         struct print_entry *entry;
999         unsigned int trace_ctx;
1000         int alloc;
1001
1002         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1003                 return 0;
1004
1005         if (unlikely(tracing_selftest_running || tracing_disabled))
1006                 return 0;
1007
1008         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1009
1010         trace_ctx = tracing_gen_ctx();
1011         buffer = tr->array_buffer.buffer;
1012         ring_buffer_nest_start(buffer);
1013         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1014                                             trace_ctx);
1015         if (!event) {
1016                 size = 0;
1017                 goto out;
1018         }
1019
1020         entry = ring_buffer_event_data(event);
1021         entry->ip = ip;
1022
1023         memcpy(&entry->buf, str, size);
1024
1025         /* Add a newline if necessary */
1026         if (entry->buf[size - 1] != '\n') {
1027                 entry->buf[size] = '\n';
1028                 entry->buf[size + 1] = '\0';
1029         } else
1030                 entry->buf[size] = '\0';
1031
1032         __buffer_unlock_commit(buffer, event);
1033         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1034  out:
1035         ring_buffer_nest_end(buffer);
1036         return size;
1037 }
1038 EXPORT_SYMBOL_GPL(__trace_array_puts);
1039
1040 /**
1041  * __trace_puts - write a constant string into the trace buffer.
1042  * @ip:    The address of the caller
1043  * @str:   The constant string to write
1044  * @size:  The size of the string.
1045  */
1046 int __trace_puts(unsigned long ip, const char *str, int size)
1047 {
1048         return __trace_array_puts(&global_trace, ip, str, size);
1049 }
1050 EXPORT_SYMBOL_GPL(__trace_puts);
1051
1052 /**
1053  * __trace_bputs - write the pointer to a constant string into trace buffer
1054  * @ip:    The address of the caller
1055  * @str:   The constant string to write to the buffer to
1056  */
1057 int __trace_bputs(unsigned long ip, const char *str)
1058 {
1059         struct ring_buffer_event *event;
1060         struct trace_buffer *buffer;
1061         struct bputs_entry *entry;
1062         unsigned int trace_ctx;
1063         int size = sizeof(struct bputs_entry);
1064         int ret = 0;
1065
1066         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1067                 return 0;
1068
1069         if (unlikely(tracing_selftest_running || tracing_disabled))
1070                 return 0;
1071
1072         trace_ctx = tracing_gen_ctx();
1073         buffer = global_trace.array_buffer.buffer;
1074
1075         ring_buffer_nest_start(buffer);
1076         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1077                                             trace_ctx);
1078         if (!event)
1079                 goto out;
1080
1081         entry = ring_buffer_event_data(event);
1082         entry->ip                       = ip;
1083         entry->str                      = str;
1084
1085         __buffer_unlock_commit(buffer, event);
1086         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1087
1088         ret = 1;
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return ret;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_bputs);
1094
1095 #ifdef CONFIG_TRACER_SNAPSHOT
1096 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1097                                            void *cond_data)
1098 {
1099         struct tracer *tracer = tr->current_trace;
1100         unsigned long flags;
1101
1102         if (in_nmi()) {
1103                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1104                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1105                 return;
1106         }
1107
1108         if (!tr->allocated_snapshot) {
1109                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1110                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1111                 tracer_tracing_off(tr);
1112                 return;
1113         }
1114
1115         /* Note, snapshot can not be used when the tracer uses it */
1116         if (tracer->use_max_tr) {
1117                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1118                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1119                 return;
1120         }
1121
1122         local_irq_save(flags);
1123         update_max_tr(tr, current, smp_processor_id(), cond_data);
1124         local_irq_restore(flags);
1125 }
1126
1127 void tracing_snapshot_instance(struct trace_array *tr)
1128 {
1129         tracing_snapshot_instance_cond(tr, NULL);
1130 }
1131
1132 /**
1133  * tracing_snapshot - take a snapshot of the current buffer.
1134  *
1135  * This causes a swap between the snapshot buffer and the current live
1136  * tracing buffer. You can use this to take snapshots of the live
1137  * trace when some condition is triggered, but continue to trace.
1138  *
1139  * Note, make sure to allocate the snapshot with either
1140  * a tracing_snapshot_alloc(), or by doing it manually
1141  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1142  *
1143  * If the snapshot buffer is not allocated, it will stop tracing.
1144  * Basically making a permanent snapshot.
1145  */
1146 void tracing_snapshot(void)
1147 {
1148         struct trace_array *tr = &global_trace;
1149
1150         tracing_snapshot_instance(tr);
1151 }
1152 EXPORT_SYMBOL_GPL(tracing_snapshot);
1153
1154 /**
1155  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1156  * @tr:         The tracing instance to snapshot
1157  * @cond_data:  The data to be tested conditionally, and possibly saved
1158  *
1159  * This is the same as tracing_snapshot() except that the snapshot is
1160  * conditional - the snapshot will only happen if the
1161  * cond_snapshot.update() implementation receiving the cond_data
1162  * returns true, which means that the trace array's cond_snapshot
1163  * update() operation used the cond_data to determine whether the
1164  * snapshot should be taken, and if it was, presumably saved it along
1165  * with the snapshot.
1166  */
1167 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1168 {
1169         tracing_snapshot_instance_cond(tr, cond_data);
1170 }
1171 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1172
1173 /**
1174  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1175  * @tr:         The tracing instance
1176  *
1177  * When the user enables a conditional snapshot using
1178  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1179  * with the snapshot.  This accessor is used to retrieve it.
1180  *
1181  * Should not be called from cond_snapshot.update(), since it takes
1182  * the tr->max_lock lock, which the code calling
1183  * cond_snapshot.update() has already done.
1184  *
1185  * Returns the cond_data associated with the trace array's snapshot.
1186  */
1187 void *tracing_cond_snapshot_data(struct trace_array *tr)
1188 {
1189         void *cond_data = NULL;
1190
1191         local_irq_disable();
1192         arch_spin_lock(&tr->max_lock);
1193
1194         if (tr->cond_snapshot)
1195                 cond_data = tr->cond_snapshot->cond_data;
1196
1197         arch_spin_unlock(&tr->max_lock);
1198         local_irq_enable();
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         local_irq_disable();
1335         arch_spin_lock(&tr->max_lock);
1336         tr->cond_snapshot = cond_snapshot;
1337         arch_spin_unlock(&tr->max_lock);
1338         local_irq_enable();
1339
1340         mutex_unlock(&trace_types_lock);
1341
1342         return ret;
1343
1344  fail_unlock:
1345         mutex_unlock(&trace_types_lock);
1346         kfree(cond_snapshot);
1347         return ret;
1348 }
1349 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1350
1351 /**
1352  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1353  * @tr:         The tracing instance
1354  *
1355  * Check whether the conditional snapshot for the given instance is
1356  * enabled; if so, free the cond_snapshot associated with it,
1357  * otherwise return -EINVAL.
1358  *
1359  * Returns 0 if successful, error otherwise.
1360  */
1361 int tracing_snapshot_cond_disable(struct trace_array *tr)
1362 {
1363         int ret = 0;
1364
1365         local_irq_disable();
1366         arch_spin_lock(&tr->max_lock);
1367
1368         if (!tr->cond_snapshot)
1369                 ret = -EINVAL;
1370         else {
1371                 kfree(tr->cond_snapshot);
1372                 tr->cond_snapshot = NULL;
1373         }
1374
1375         arch_spin_unlock(&tr->max_lock);
1376         local_irq_enable();
1377
1378         return ret;
1379 }
1380 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1381 #else
1382 void tracing_snapshot(void)
1383 {
1384         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1385 }
1386 EXPORT_SYMBOL_GPL(tracing_snapshot);
1387 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1392 int tracing_alloc_snapshot(void)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1395         return -ENODEV;
1396 }
1397 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1398 void tracing_snapshot_alloc(void)
1399 {
1400         /* Give warning */
1401         tracing_snapshot();
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1404 void *tracing_cond_snapshot_data(struct trace_array *tr)
1405 {
1406         return NULL;
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1409 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1410 {
1411         return -ENODEV;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1414 int tracing_snapshot_cond_disable(struct trace_array *tr)
1415 {
1416         return false;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1419 #define free_snapshot(tr)       do { } while (0)
1420 #endif /* CONFIG_TRACER_SNAPSHOT */
1421
1422 void tracer_tracing_off(struct trace_array *tr)
1423 {
1424         if (tr->array_buffer.buffer)
1425                 ring_buffer_record_off(tr->array_buffer.buffer);
1426         /*
1427          * This flag is looked at when buffers haven't been allocated
1428          * yet, or by some tracers (like irqsoff), that just want to
1429          * know if the ring buffer has been disabled, but it can handle
1430          * races of where it gets disabled but we still do a record.
1431          * As the check is in the fast path of the tracers, it is more
1432          * important to be fast than accurate.
1433          */
1434         tr->buffer_disabled = 1;
1435         /* Make the flag seen by readers */
1436         smp_wmb();
1437 }
1438
1439 /**
1440  * tracing_off - turn off tracing buffers
1441  *
1442  * This function stops the tracing buffers from recording data.
1443  * It does not disable any overhead the tracers themselves may
1444  * be causing. This function simply causes all recording to
1445  * the ring buffers to fail.
1446  */
1447 void tracing_off(void)
1448 {
1449         tracer_tracing_off(&global_trace);
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_off);
1452
1453 void disable_trace_on_warning(void)
1454 {
1455         if (__disable_trace_on_warning) {
1456                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1457                         "Disabling tracing due to warning\n");
1458                 tracing_off();
1459         }
1460 }
1461
1462 /**
1463  * tracer_tracing_is_on - show real state of ring buffer enabled
1464  * @tr : the trace array to know if ring buffer is enabled
1465  *
1466  * Shows real state of the ring buffer if it is enabled or not.
1467  */
1468 bool tracer_tracing_is_on(struct trace_array *tr)
1469 {
1470         if (tr->array_buffer.buffer)
1471                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1472         return !tr->buffer_disabled;
1473 }
1474
1475 /**
1476  * tracing_is_on - show state of ring buffers enabled
1477  */
1478 int tracing_is_on(void)
1479 {
1480         return tracer_tracing_is_on(&global_trace);
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_is_on);
1483
1484 static int __init set_buf_size(char *str)
1485 {
1486         unsigned long buf_size;
1487
1488         if (!str)
1489                 return 0;
1490         buf_size = memparse(str, &str);
1491         /*
1492          * nr_entries can not be zero and the startup
1493          * tests require some buffer space. Therefore
1494          * ensure we have at least 4096 bytes of buffer.
1495          */
1496         trace_buf_size = max(4096UL, buf_size);
1497         return 1;
1498 }
1499 __setup("trace_buf_size=", set_buf_size);
1500
1501 static int __init set_tracing_thresh(char *str)
1502 {
1503         unsigned long threshold;
1504         int ret;
1505
1506         if (!str)
1507                 return 0;
1508         ret = kstrtoul(str, 0, &threshold);
1509         if (ret < 0)
1510                 return 0;
1511         tracing_thresh = threshold * 1000;
1512         return 1;
1513 }
1514 __setup("tracing_thresh=", set_tracing_thresh);
1515
1516 unsigned long nsecs_to_usecs(unsigned long nsecs)
1517 {
1518         return nsecs / 1000;
1519 }
1520
1521 /*
1522  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1523  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1524  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1525  * of strings in the order that the evals (enum) were defined.
1526  */
1527 #undef C
1528 #define C(a, b) b
1529
1530 /* These must match the bit positions in trace_iterator_flags */
1531 static const char *trace_options[] = {
1532         TRACE_FLAGS
1533         NULL
1534 };
1535
1536 static struct {
1537         u64 (*func)(void);
1538         const char *name;
1539         int in_ns;              /* is this clock in nanoseconds? */
1540 } trace_clocks[] = {
1541         { trace_clock_local,            "local",        1 },
1542         { trace_clock_global,           "global",       1 },
1543         { trace_clock_counter,          "counter",      0 },
1544         { trace_clock_jiffies,          "uptime",       0 },
1545         { trace_clock,                  "perf",         1 },
1546         { ktime_get_mono_fast_ns,       "mono",         1 },
1547         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1548         { ktime_get_boot_fast_ns,       "boot",         1 },
1549         ARCH_TRACE_CLOCKS
1550 };
1551
1552 bool trace_clock_in_ns(struct trace_array *tr)
1553 {
1554         if (trace_clocks[tr->clock_id].in_ns)
1555                 return true;
1556
1557         return false;
1558 }
1559
1560 /*
1561  * trace_parser_get_init - gets the buffer for trace parser
1562  */
1563 int trace_parser_get_init(struct trace_parser *parser, int size)
1564 {
1565         memset(parser, 0, sizeof(*parser));
1566
1567         parser->buffer = kmalloc(size, GFP_KERNEL);
1568         if (!parser->buffer)
1569                 return 1;
1570
1571         parser->size = size;
1572         return 0;
1573 }
1574
1575 /*
1576  * trace_parser_put - frees the buffer for trace parser
1577  */
1578 void trace_parser_put(struct trace_parser *parser)
1579 {
1580         kfree(parser->buffer);
1581         parser->buffer = NULL;
1582 }
1583
1584 /*
1585  * trace_get_user - reads the user input string separated by  space
1586  * (matched by isspace(ch))
1587  *
1588  * For each string found the 'struct trace_parser' is updated,
1589  * and the function returns.
1590  *
1591  * Returns number of bytes read.
1592  *
1593  * See kernel/trace/trace.h for 'struct trace_parser' details.
1594  */
1595 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1596         size_t cnt, loff_t *ppos)
1597 {
1598         char ch;
1599         size_t read = 0;
1600         ssize_t ret;
1601
1602         if (!*ppos)
1603                 trace_parser_clear(parser);
1604
1605         ret = get_user(ch, ubuf++);
1606         if (ret)
1607                 goto out;
1608
1609         read++;
1610         cnt--;
1611
1612         /*
1613          * The parser is not finished with the last write,
1614          * continue reading the user input without skipping spaces.
1615          */
1616         if (!parser->cont) {
1617                 /* skip white space */
1618                 while (cnt && isspace(ch)) {
1619                         ret = get_user(ch, ubuf++);
1620                         if (ret)
1621                                 goto out;
1622                         read++;
1623                         cnt--;
1624                 }
1625
1626                 parser->idx = 0;
1627
1628                 /* only spaces were written */
1629                 if (isspace(ch) || !ch) {
1630                         *ppos += read;
1631                         ret = read;
1632                         goto out;
1633                 }
1634         }
1635
1636         /* read the non-space input */
1637         while (cnt && !isspace(ch) && ch) {
1638                 if (parser->idx < parser->size - 1)
1639                         parser->buffer[parser->idx++] = ch;
1640                 else {
1641                         ret = -EINVAL;
1642                         goto out;
1643                 }
1644                 ret = get_user(ch, ubuf++);
1645                 if (ret)
1646                         goto out;
1647                 read++;
1648                 cnt--;
1649         }
1650
1651         /* We either got finished input or we have to wait for another call. */
1652         if (isspace(ch) || !ch) {
1653                 parser->buffer[parser->idx] = 0;
1654                 parser->cont = false;
1655         } else if (parser->idx < parser->size - 1) {
1656                 parser->cont = true;
1657                 parser->buffer[parser->idx++] = ch;
1658                 /* Make sure the parsed string always terminates with '\0'. */
1659                 parser->buffer[parser->idx] = 0;
1660         } else {
1661                 ret = -EINVAL;
1662                 goto out;
1663         }
1664
1665         *ppos += read;
1666         ret = read;
1667
1668 out:
1669         return ret;
1670 }
1671
1672 /* TODO add a seq_buf_to_buffer() */
1673 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1674 {
1675         int len;
1676
1677         if (trace_seq_used(s) <= s->seq.readpos)
1678                 return -EBUSY;
1679
1680         len = trace_seq_used(s) - s->seq.readpos;
1681         if (cnt > len)
1682                 cnt = len;
1683         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1684
1685         s->seq.readpos += cnt;
1686         return cnt;
1687 }
1688
1689 unsigned long __read_mostly     tracing_thresh;
1690
1691 #ifdef CONFIG_TRACER_MAX_TRACE
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency",
1718                                               TRACE_MODE_WRITE,
1719                                               d_tracer, tr,
1720                                               &tracing_max_lat_fops);
1721 }
1722
1723 __init static int latency_fsnotify_init(void)
1724 {
1725         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1726                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1727         if (!fsnotify_wq) {
1728                 pr_err("Unable to allocate tr_max_lat_wq\n");
1729                 return -ENOMEM;
1730         }
1731         return 0;
1732 }
1733
1734 late_initcall_sync(latency_fsnotify_init);
1735
1736 void latency_fsnotify(struct trace_array *tr)
1737 {
1738         if (!fsnotify_wq)
1739                 return;
1740         /*
1741          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1742          * possible that we are called from __schedule() or do_idle(), which
1743          * could cause a deadlock.
1744          */
1745         irq_work_queue(&tr->fsnotify_irqwork);
1746 }
1747
1748 #else /* !LATENCY_FS_NOTIFY */
1749
1750 #define trace_create_maxlat_file(tr, d_tracer)                          \
1751         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1752                           d_tracer, tr, &tracing_max_lat_fops)
1753
1754 #endif
1755
1756 /*
1757  * Copy the new maximum trace into the separate maximum-trace
1758  * structure. (this way the maximum trace is permanently saved,
1759  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1760  */
1761 static void
1762 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1763 {
1764         struct array_buffer *trace_buf = &tr->array_buffer;
1765         struct array_buffer *max_buf = &tr->max_buffer;
1766         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1767         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1768
1769         max_buf->cpu = cpu;
1770         max_buf->time_start = data->preempt_timestamp;
1771
1772         max_data->saved_latency = tr->max_latency;
1773         max_data->critical_start = data->critical_start;
1774         max_data->critical_end = data->critical_end;
1775
1776         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1777         max_data->pid = tsk->pid;
1778         /*
1779          * If tsk == current, then use current_uid(), as that does not use
1780          * RCU. The irq tracer can be called out of RCU scope.
1781          */
1782         if (tsk == current)
1783                 max_data->uid = current_uid();
1784         else
1785                 max_data->uid = task_uid(tsk);
1786
1787         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1788         max_data->policy = tsk->policy;
1789         max_data->rt_priority = tsk->rt_priority;
1790
1791         /* record this tasks comm */
1792         tracing_record_cmdline(tsk);
1793         latency_fsnotify(tr);
1794 }
1795
1796 /**
1797  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1798  * @tr: tracer
1799  * @tsk: the task with the latency
1800  * @cpu: The cpu that initiated the trace.
1801  * @cond_data: User data associated with a conditional snapshot
1802  *
1803  * Flip the buffers between the @tr and the max_tr and record information
1804  * about which task was the cause of this latency.
1805  */
1806 void
1807 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1808               void *cond_data)
1809 {
1810         if (tr->stop_count)
1811                 return;
1812
1813         WARN_ON_ONCE(!irqs_disabled());
1814
1815         if (!tr->allocated_snapshot) {
1816                 /* Only the nop tracer should hit this when disabling */
1817                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1818                 return;
1819         }
1820
1821         arch_spin_lock(&tr->max_lock);
1822
1823         /* Inherit the recordable setting from array_buffer */
1824         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1825                 ring_buffer_record_on(tr->max_buffer.buffer);
1826         else
1827                 ring_buffer_record_off(tr->max_buffer.buffer);
1828
1829 #ifdef CONFIG_TRACER_SNAPSHOT
1830         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1831                 arch_spin_unlock(&tr->max_lock);
1832                 return;
1833         }
1834 #endif
1835         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1836
1837         __update_max_tr(tr, tsk, cpu);
1838
1839         arch_spin_unlock(&tr->max_lock);
1840 }
1841
1842 /**
1843  * update_max_tr_single - only copy one trace over, and reset the rest
1844  * @tr: tracer
1845  * @tsk: task with the latency
1846  * @cpu: the cpu of the buffer to copy.
1847  *
1848  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1849  */
1850 void
1851 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852 {
1853         int ret;
1854
1855         if (tr->stop_count)
1856                 return;
1857
1858         WARN_ON_ONCE(!irqs_disabled());
1859         if (!tr->allocated_snapshot) {
1860                 /* Only the nop tracer should hit this when disabling */
1861                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1862                 return;
1863         }
1864
1865         arch_spin_lock(&tr->max_lock);
1866
1867         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1868
1869         if (ret == -EBUSY) {
1870                 /*
1871                  * We failed to swap the buffer due to a commit taking
1872                  * place on this CPU. We fail to record, but we reset
1873                  * the max trace buffer (no one writes directly to it)
1874                  * and flag that it failed.
1875                  * Another reason is resize is in progress.
1876                  */
1877                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1878                         "Failed to swap buffers due to commit or resize in progress\n");
1879         }
1880
1881         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1882
1883         __update_max_tr(tr, tsk, cpu);
1884         arch_spin_unlock(&tr->max_lock);
1885 }
1886
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891         /* Iterators are static, they should be filled or empty */
1892         if (trace_buffer_iter(iter, iter->cpu_file))
1893                 return 0;
1894
1895         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896                                 full);
1897 }
1898
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901
1902 struct trace_selftests {
1903         struct list_head                list;
1904         struct tracer                   *type;
1905 };
1906
1907 static LIST_HEAD(postponed_selftests);
1908
1909 static int save_selftest(struct tracer *type)
1910 {
1911         struct trace_selftests *selftest;
1912
1913         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914         if (!selftest)
1915                 return -ENOMEM;
1916
1917         selftest->type = type;
1918         list_add(&selftest->list, &postponed_selftests);
1919         return 0;
1920 }
1921
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924         struct trace_array *tr = &global_trace;
1925         struct tracer *saved_tracer = tr->current_trace;
1926         int ret;
1927
1928         if (!type->selftest || tracing_selftest_disabled)
1929                 return 0;
1930
1931         /*
1932          * If a tracer registers early in boot up (before scheduling is
1933          * initialized and such), then do not run its selftests yet.
1934          * Instead, run it a little later in the boot process.
1935          */
1936         if (!selftests_can_run)
1937                 return save_selftest(type);
1938
1939         if (!tracing_is_on()) {
1940                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941                         type->name);
1942                 return 0;
1943         }
1944
1945         /*
1946          * Run a selftest on this tracer.
1947          * Here we reset the trace buffer, and set the current
1948          * tracer to be this tracer. The tracer can then run some
1949          * internal tracing to verify that everything is in order.
1950          * If we fail, we do not register this tracer.
1951          */
1952         tracing_reset_online_cpus(&tr->array_buffer);
1953
1954         tr->current_trace = type;
1955
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957         if (type->use_max_tr) {
1958                 /* If we expanded the buffers, make sure the max is expanded too */
1959                 if (ring_buffer_expanded)
1960                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961                                            RING_BUFFER_ALL_CPUS);
1962                 tr->allocated_snapshot = true;
1963         }
1964 #endif
1965
1966         /* the test is responsible for initializing and enabling */
1967         pr_info("Testing tracer %s: ", type->name);
1968         ret = type->selftest(type, tr);
1969         /* the test is responsible for resetting too */
1970         tr->current_trace = saved_tracer;
1971         if (ret) {
1972                 printk(KERN_CONT "FAILED!\n");
1973                 /* Add the warning after printing 'FAILED' */
1974                 WARN_ON(1);
1975                 return -1;
1976         }
1977         /* Only reset on passing, to avoid touching corrupted buffers */
1978         tracing_reset_online_cpus(&tr->array_buffer);
1979
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981         if (type->use_max_tr) {
1982                 tr->allocated_snapshot = false;
1983
1984                 /* Shrink the max buffer again */
1985                 if (ring_buffer_expanded)
1986                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1987                                            RING_BUFFER_ALL_CPUS);
1988         }
1989 #endif
1990
1991         printk(KERN_CONT "PASSED\n");
1992         return 0;
1993 }
1994
1995 static __init int init_trace_selftests(void)
1996 {
1997         struct trace_selftests *p, *n;
1998         struct tracer *t, **last;
1999         int ret;
2000
2001         selftests_can_run = true;
2002
2003         mutex_lock(&trace_types_lock);
2004
2005         if (list_empty(&postponed_selftests))
2006                 goto out;
2007
2008         pr_info("Running postponed tracer tests:\n");
2009
2010         tracing_selftest_running = true;
2011         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012                 /* This loop can take minutes when sanitizers are enabled, so
2013                  * lets make sure we allow RCU processing.
2014                  */
2015                 cond_resched();
2016                 ret = run_tracer_selftest(p->type);
2017                 /* If the test fails, then warn and remove from available_tracers */
2018                 if (ret < 0) {
2019                         WARN(1, "tracer: %s failed selftest, disabling\n",
2020                              p->type->name);
2021                         last = &trace_types;
2022                         for (t = trace_types; t; t = t->next) {
2023                                 if (t == p->type) {
2024                                         *last = t->next;
2025                                         break;
2026                                 }
2027                                 last = &t->next;
2028                         }
2029                 }
2030                 list_del(&p->list);
2031                 kfree(p);
2032         }
2033         tracing_selftest_running = false;
2034
2035  out:
2036         mutex_unlock(&trace_types_lock);
2037
2038         return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044         return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049
2050 static void __init apply_trace_boot_options(void);
2051
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060         struct tracer *t;
2061         int ret = 0;
2062
2063         if (!type->name) {
2064                 pr_info("Tracer must have a name\n");
2065                 return -1;
2066         }
2067
2068         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070                 return -1;
2071         }
2072
2073         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074                 pr_warn("Can not register tracer %s due to lockdown\n",
2075                            type->name);
2076                 return -EPERM;
2077         }
2078
2079         mutex_lock(&trace_types_lock);
2080
2081         tracing_selftest_running = true;
2082
2083         for (t = trace_types; t; t = t->next) {
2084                 if (strcmp(type->name, t->name) == 0) {
2085                         /* already found */
2086                         pr_info("Tracer %s already registered\n",
2087                                 type->name);
2088                         ret = -1;
2089                         goto out;
2090                 }
2091         }
2092
2093         if (!type->set_flag)
2094                 type->set_flag = &dummy_set_flag;
2095         if (!type->flags) {
2096                 /*allocate a dummy tracer_flags*/
2097                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098                 if (!type->flags) {
2099                         ret = -ENOMEM;
2100                         goto out;
2101                 }
2102                 type->flags->val = 0;
2103                 type->flags->opts = dummy_tracer_opt;
2104         } else
2105                 if (!type->flags->opts)
2106                         type->flags->opts = dummy_tracer_opt;
2107
2108         /* store the tracer for __set_tracer_option */
2109         type->flags->trace = type;
2110
2111         ret = run_tracer_selftest(type);
2112         if (ret < 0)
2113                 goto out;
2114
2115         type->next = trace_types;
2116         trace_types = type;
2117         add_tracer_options(&global_trace, type);
2118
2119  out:
2120         tracing_selftest_running = false;
2121         mutex_unlock(&trace_types_lock);
2122
2123         if (ret || !default_bootup_tracer)
2124                 goto out_unlock;
2125
2126         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127                 goto out_unlock;
2128
2129         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130         /* Do we want this tracer to start on bootup? */
2131         tracing_set_tracer(&global_trace, type->name);
2132         default_bootup_tracer = NULL;
2133
2134         apply_trace_boot_options();
2135
2136         /* disable other selftests, since this will break it. */
2137         disable_tracing_selftest("running a tracer");
2138
2139  out_unlock:
2140         return ret;
2141 }
2142
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145         struct trace_buffer *buffer = buf->buffer;
2146
2147         if (!buffer)
2148                 return;
2149
2150         ring_buffer_record_disable(buffer);
2151
2152         /* Make sure all commits have finished */
2153         synchronize_rcu();
2154         ring_buffer_reset_cpu(buffer, cpu);
2155
2156         ring_buffer_record_enable(buffer);
2157 }
2158
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161         struct trace_buffer *buffer = buf->buffer;
2162
2163         if (!buffer)
2164                 return;
2165
2166         ring_buffer_record_disable(buffer);
2167
2168         /* Make sure all commits have finished */
2169         synchronize_rcu();
2170
2171         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172
2173         ring_buffer_reset_online_cpus(buffer);
2174
2175         ring_buffer_record_enable(buffer);
2176 }
2177
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus_unlocked(void)
2180 {
2181         struct trace_array *tr;
2182
2183         lockdep_assert_held(&trace_types_lock);
2184
2185         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2186                 if (!tr->clear_trace)
2187                         continue;
2188                 tr->clear_trace = false;
2189                 tracing_reset_online_cpus(&tr->array_buffer);
2190 #ifdef CONFIG_TRACER_MAX_TRACE
2191                 tracing_reset_online_cpus(&tr->max_buffer);
2192 #endif
2193         }
2194 }
2195
2196 void tracing_reset_all_online_cpus(void)
2197 {
2198         mutex_lock(&trace_types_lock);
2199         tracing_reset_all_online_cpus_unlocked();
2200         mutex_unlock(&trace_types_lock);
2201 }
2202
2203 /*
2204  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2205  * is the tgid last observed corresponding to pid=i.
2206  */
2207 static int *tgid_map;
2208
2209 /* The maximum valid index into tgid_map. */
2210 static size_t tgid_map_max;
2211
2212 #define SAVED_CMDLINES_DEFAULT 128
2213 #define NO_CMDLINE_MAP UINT_MAX
2214 /*
2215  * Preemption must be disabled before acquiring trace_cmdline_lock.
2216  * The various trace_arrays' max_lock must be acquired in a context
2217  * where interrupt is disabled.
2218  */
2219 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2220 struct saved_cmdlines_buffer {
2221         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2222         unsigned *map_cmdline_to_pid;
2223         unsigned cmdline_num;
2224         int cmdline_idx;
2225         char *saved_cmdlines;
2226 };
2227 static struct saved_cmdlines_buffer *savedcmd;
2228
2229 static inline char *get_saved_cmdlines(int idx)
2230 {
2231         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2232 }
2233
2234 static inline void set_cmdline(int idx, const char *cmdline)
2235 {
2236         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2237 }
2238
2239 static int allocate_cmdlines_buffer(unsigned int val,
2240                                     struct saved_cmdlines_buffer *s)
2241 {
2242         s->map_cmdline_to_pid = kmalloc_array(val,
2243                                               sizeof(*s->map_cmdline_to_pid),
2244                                               GFP_KERNEL);
2245         if (!s->map_cmdline_to_pid)
2246                 return -ENOMEM;
2247
2248         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2249         if (!s->saved_cmdlines) {
2250                 kfree(s->map_cmdline_to_pid);
2251                 return -ENOMEM;
2252         }
2253
2254         s->cmdline_idx = 0;
2255         s->cmdline_num = val;
2256         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2257                sizeof(s->map_pid_to_cmdline));
2258         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2259                val * sizeof(*s->map_cmdline_to_pid));
2260
2261         return 0;
2262 }
2263
2264 static int trace_create_savedcmd(void)
2265 {
2266         int ret;
2267
2268         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2269         if (!savedcmd)
2270                 return -ENOMEM;
2271
2272         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2273         if (ret < 0) {
2274                 kfree(savedcmd);
2275                 savedcmd = NULL;
2276                 return -ENOMEM;
2277         }
2278
2279         return 0;
2280 }
2281
2282 int is_tracing_stopped(void)
2283 {
2284         return global_trace.stop_count;
2285 }
2286
2287 /**
2288  * tracing_start - quick start of the tracer
2289  *
2290  * If tracing is enabled but was stopped by tracing_stop,
2291  * this will start the tracer back up.
2292  */
2293 void tracing_start(void)
2294 {
2295         struct trace_buffer *buffer;
2296         unsigned long flags;
2297
2298         if (tracing_disabled)
2299                 return;
2300
2301         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2302         if (--global_trace.stop_count) {
2303                 if (global_trace.stop_count < 0) {
2304                         /* Someone screwed up their debugging */
2305                         WARN_ON_ONCE(1);
2306                         global_trace.stop_count = 0;
2307                 }
2308                 goto out;
2309         }
2310
2311         /* Prevent the buffers from switching */
2312         arch_spin_lock(&global_trace.max_lock);
2313
2314         buffer = global_trace.array_buffer.buffer;
2315         if (buffer)
2316                 ring_buffer_record_enable(buffer);
2317
2318 #ifdef CONFIG_TRACER_MAX_TRACE
2319         buffer = global_trace.max_buffer.buffer;
2320         if (buffer)
2321                 ring_buffer_record_enable(buffer);
2322 #endif
2323
2324         arch_spin_unlock(&global_trace.max_lock);
2325
2326  out:
2327         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2328 }
2329
2330 static void tracing_start_tr(struct trace_array *tr)
2331 {
2332         struct trace_buffer *buffer;
2333         unsigned long flags;
2334
2335         if (tracing_disabled)
2336                 return;
2337
2338         /* If global, we need to also start the max tracer */
2339         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2340                 return tracing_start();
2341
2342         raw_spin_lock_irqsave(&tr->start_lock, flags);
2343
2344         if (--tr->stop_count) {
2345                 if (tr->stop_count < 0) {
2346                         /* Someone screwed up their debugging */
2347                         WARN_ON_ONCE(1);
2348                         tr->stop_count = 0;
2349                 }
2350                 goto out;
2351         }
2352
2353         buffer = tr->array_buffer.buffer;
2354         if (buffer)
2355                 ring_buffer_record_enable(buffer);
2356
2357  out:
2358         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2359 }
2360
2361 /**
2362  * tracing_stop - quick stop of the tracer
2363  *
2364  * Light weight way to stop tracing. Use in conjunction with
2365  * tracing_start.
2366  */
2367 void tracing_stop(void)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2373         if (global_trace.stop_count++)
2374                 goto out;
2375
2376         /* Prevent the buffers from switching */
2377         arch_spin_lock(&global_trace.max_lock);
2378
2379         buffer = global_trace.array_buffer.buffer;
2380         if (buffer)
2381                 ring_buffer_record_disable(buffer);
2382
2383 #ifdef CONFIG_TRACER_MAX_TRACE
2384         buffer = global_trace.max_buffer.buffer;
2385         if (buffer)
2386                 ring_buffer_record_disable(buffer);
2387 #endif
2388
2389         arch_spin_unlock(&global_trace.max_lock);
2390
2391  out:
2392         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2393 }
2394
2395 static void tracing_stop_tr(struct trace_array *tr)
2396 {
2397         struct trace_buffer *buffer;
2398         unsigned long flags;
2399
2400         /* If global, we need to also stop the max tracer */
2401         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2402                 return tracing_stop();
2403
2404         raw_spin_lock_irqsave(&tr->start_lock, flags);
2405         if (tr->stop_count++)
2406                 goto out;
2407
2408         buffer = tr->array_buffer.buffer;
2409         if (buffer)
2410                 ring_buffer_record_disable(buffer);
2411
2412  out:
2413         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2414 }
2415
2416 static int trace_save_cmdline(struct task_struct *tsk)
2417 {
2418         unsigned tpid, idx;
2419
2420         /* treat recording of idle task as a success */
2421         if (!tsk->pid)
2422                 return 1;
2423
2424         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2425
2426         /*
2427          * It's not the end of the world if we don't get
2428          * the lock, but we also don't want to spin
2429          * nor do we want to disable interrupts,
2430          * so if we miss here, then better luck next time.
2431          *
2432          * This is called within the scheduler and wake up, so interrupts
2433          * had better been disabled and run queue lock been held.
2434          */
2435         lockdep_assert_preemption_disabled();
2436         if (!arch_spin_trylock(&trace_cmdline_lock))
2437                 return 0;
2438
2439         idx = savedcmd->map_pid_to_cmdline[tpid];
2440         if (idx == NO_CMDLINE_MAP) {
2441                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2442
2443                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2444                 savedcmd->cmdline_idx = idx;
2445         }
2446
2447         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2448         set_cmdline(idx, tsk->comm);
2449
2450         arch_spin_unlock(&trace_cmdline_lock);
2451
2452         return 1;
2453 }
2454
2455 static void __trace_find_cmdline(int pid, char comm[])
2456 {
2457         unsigned map;
2458         int tpid;
2459
2460         if (!pid) {
2461                 strcpy(comm, "<idle>");
2462                 return;
2463         }
2464
2465         if (WARN_ON_ONCE(pid < 0)) {
2466                 strcpy(comm, "<XXX>");
2467                 return;
2468         }
2469
2470         tpid = pid & (PID_MAX_DEFAULT - 1);
2471         map = savedcmd->map_pid_to_cmdline[tpid];
2472         if (map != NO_CMDLINE_MAP) {
2473                 tpid = savedcmd->map_cmdline_to_pid[map];
2474                 if (tpid == pid) {
2475                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2476                         return;
2477                 }
2478         }
2479         strcpy(comm, "<...>");
2480 }
2481
2482 void trace_find_cmdline(int pid, char comm[])
2483 {
2484         preempt_disable();
2485         arch_spin_lock(&trace_cmdline_lock);
2486
2487         __trace_find_cmdline(pid, comm);
2488
2489         arch_spin_unlock(&trace_cmdline_lock);
2490         preempt_enable();
2491 }
2492
2493 static int *trace_find_tgid_ptr(int pid)
2494 {
2495         /*
2496          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2497          * if we observe a non-NULL tgid_map then we also observe the correct
2498          * tgid_map_max.
2499          */
2500         int *map = smp_load_acquire(&tgid_map);
2501
2502         if (unlikely(!map || pid > tgid_map_max))
2503                 return NULL;
2504
2505         return &map[pid];
2506 }
2507
2508 int trace_find_tgid(int pid)
2509 {
2510         int *ptr = trace_find_tgid_ptr(pid);
2511
2512         return ptr ? *ptr : 0;
2513 }
2514
2515 static int trace_save_tgid(struct task_struct *tsk)
2516 {
2517         int *ptr;
2518
2519         /* treat recording of idle task as a success */
2520         if (!tsk->pid)
2521                 return 1;
2522
2523         ptr = trace_find_tgid_ptr(tsk->pid);
2524         if (!ptr)
2525                 return 0;
2526
2527         *ptr = tsk->tgid;
2528         return 1;
2529 }
2530
2531 static bool tracing_record_taskinfo_skip(int flags)
2532 {
2533         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2534                 return true;
2535         if (!__this_cpu_read(trace_taskinfo_save))
2536                 return true;
2537         return false;
2538 }
2539
2540 /**
2541  * tracing_record_taskinfo - record the task info of a task
2542  *
2543  * @task:  task to record
2544  * @flags: TRACE_RECORD_CMDLINE for recording comm
2545  *         TRACE_RECORD_TGID for recording tgid
2546  */
2547 void tracing_record_taskinfo(struct task_struct *task, int flags)
2548 {
2549         bool done;
2550
2551         if (tracing_record_taskinfo_skip(flags))
2552                 return;
2553
2554         /*
2555          * Record as much task information as possible. If some fail, continue
2556          * to try to record the others.
2557          */
2558         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2559         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2560
2561         /* If recording any information failed, retry again soon. */
2562         if (!done)
2563                 return;
2564
2565         __this_cpu_write(trace_taskinfo_save, false);
2566 }
2567
2568 /**
2569  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2570  *
2571  * @prev: previous task during sched_switch
2572  * @next: next task during sched_switch
2573  * @flags: TRACE_RECORD_CMDLINE for recording comm
2574  *         TRACE_RECORD_TGID for recording tgid
2575  */
2576 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2577                                           struct task_struct *next, int flags)
2578 {
2579         bool done;
2580
2581         if (tracing_record_taskinfo_skip(flags))
2582                 return;
2583
2584         /*
2585          * Record as much task information as possible. If some fail, continue
2586          * to try to record the others.
2587          */
2588         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2589         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2590         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2591         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2592
2593         /* If recording any information failed, retry again soon. */
2594         if (!done)
2595                 return;
2596
2597         __this_cpu_write(trace_taskinfo_save, false);
2598 }
2599
2600 /* Helpers to record a specific task information */
2601 void tracing_record_cmdline(struct task_struct *task)
2602 {
2603         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2604 }
2605
2606 void tracing_record_tgid(struct task_struct *task)
2607 {
2608         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2609 }
2610
2611 /*
2612  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2613  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2614  * simplifies those functions and keeps them in sync.
2615  */
2616 enum print_line_t trace_handle_return(struct trace_seq *s)
2617 {
2618         return trace_seq_has_overflowed(s) ?
2619                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2620 }
2621 EXPORT_SYMBOL_GPL(trace_handle_return);
2622
2623 static unsigned short migration_disable_value(void)
2624 {
2625 #if defined(CONFIG_SMP)
2626         return current->migration_disabled;
2627 #else
2628         return 0;
2629 #endif
2630 }
2631
2632 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2633 {
2634         unsigned int trace_flags = irqs_status;
2635         unsigned int pc;
2636
2637         pc = preempt_count();
2638
2639         if (pc & NMI_MASK)
2640                 trace_flags |= TRACE_FLAG_NMI;
2641         if (pc & HARDIRQ_MASK)
2642                 trace_flags |= TRACE_FLAG_HARDIRQ;
2643         if (in_serving_softirq())
2644                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2645
2646         if (tif_need_resched())
2647                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2648         if (test_preempt_need_resched())
2649                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2650         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2651                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2652 }
2653
2654 struct ring_buffer_event *
2655 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2656                           int type,
2657                           unsigned long len,
2658                           unsigned int trace_ctx)
2659 {
2660         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2661 }
2662
2663 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2664 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2665 static int trace_buffered_event_ref;
2666
2667 /**
2668  * trace_buffered_event_enable - enable buffering events
2669  *
2670  * When events are being filtered, it is quicker to use a temporary
2671  * buffer to write the event data into if there's a likely chance
2672  * that it will not be committed. The discard of the ring buffer
2673  * is not as fast as committing, and is much slower than copying
2674  * a commit.
2675  *
2676  * When an event is to be filtered, allocate per cpu buffers to
2677  * write the event data into, and if the event is filtered and discarded
2678  * it is simply dropped, otherwise, the entire data is to be committed
2679  * in one shot.
2680  */
2681 void trace_buffered_event_enable(void)
2682 {
2683         struct ring_buffer_event *event;
2684         struct page *page;
2685         int cpu;
2686
2687         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2688
2689         if (trace_buffered_event_ref++)
2690                 return;
2691
2692         for_each_tracing_cpu(cpu) {
2693                 page = alloc_pages_node(cpu_to_node(cpu),
2694                                         GFP_KERNEL | __GFP_NORETRY, 0);
2695                 if (!page)
2696                         goto failed;
2697
2698                 event = page_address(page);
2699                 memset(event, 0, sizeof(*event));
2700
2701                 per_cpu(trace_buffered_event, cpu) = event;
2702
2703                 preempt_disable();
2704                 if (cpu == smp_processor_id() &&
2705                     __this_cpu_read(trace_buffered_event) !=
2706                     per_cpu(trace_buffered_event, cpu))
2707                         WARN_ON_ONCE(1);
2708                 preempt_enable();
2709         }
2710
2711         return;
2712  failed:
2713         trace_buffered_event_disable();
2714 }
2715
2716 static void enable_trace_buffered_event(void *data)
2717 {
2718         /* Probably not needed, but do it anyway */
2719         smp_rmb();
2720         this_cpu_dec(trace_buffered_event_cnt);
2721 }
2722
2723 static void disable_trace_buffered_event(void *data)
2724 {
2725         this_cpu_inc(trace_buffered_event_cnt);
2726 }
2727
2728 /**
2729  * trace_buffered_event_disable - disable buffering events
2730  *
2731  * When a filter is removed, it is faster to not use the buffered
2732  * events, and to commit directly into the ring buffer. Free up
2733  * the temp buffers when there are no more users. This requires
2734  * special synchronization with current events.
2735  */
2736 void trace_buffered_event_disable(void)
2737 {
2738         int cpu;
2739
2740         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2741
2742         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2743                 return;
2744
2745         if (--trace_buffered_event_ref)
2746                 return;
2747
2748         preempt_disable();
2749         /* For each CPU, set the buffer as used. */
2750         smp_call_function_many(tracing_buffer_mask,
2751                                disable_trace_buffered_event, NULL, 1);
2752         preempt_enable();
2753
2754         /* Wait for all current users to finish */
2755         synchronize_rcu();
2756
2757         for_each_tracing_cpu(cpu) {
2758                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2759                 per_cpu(trace_buffered_event, cpu) = NULL;
2760         }
2761         /*
2762          * Make sure trace_buffered_event is NULL before clearing
2763          * trace_buffered_event_cnt.
2764          */
2765         smp_wmb();
2766
2767         preempt_disable();
2768         /* Do the work on each cpu */
2769         smp_call_function_many(tracing_buffer_mask,
2770                                enable_trace_buffered_event, NULL, 1);
2771         preempt_enable();
2772 }
2773
2774 static struct trace_buffer *temp_buffer;
2775
2776 struct ring_buffer_event *
2777 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2778                           struct trace_event_file *trace_file,
2779                           int type, unsigned long len,
2780                           unsigned int trace_ctx)
2781 {
2782         struct ring_buffer_event *entry;
2783         struct trace_array *tr = trace_file->tr;
2784         int val;
2785
2786         *current_rb = tr->array_buffer.buffer;
2787
2788         if (!tr->no_filter_buffering_ref &&
2789             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2790             (entry = this_cpu_read(trace_buffered_event))) {
2791                 /*
2792                  * Filtering is on, so try to use the per cpu buffer first.
2793                  * This buffer will simulate a ring_buffer_event,
2794                  * where the type_len is zero and the array[0] will
2795                  * hold the full length.
2796                  * (see include/linux/ring-buffer.h for details on
2797                  *  how the ring_buffer_event is structured).
2798                  *
2799                  * Using a temp buffer during filtering and copying it
2800                  * on a matched filter is quicker than writing directly
2801                  * into the ring buffer and then discarding it when
2802                  * it doesn't match. That is because the discard
2803                  * requires several atomic operations to get right.
2804                  * Copying on match and doing nothing on a failed match
2805                  * is still quicker than no copy on match, but having
2806                  * to discard out of the ring buffer on a failed match.
2807                  */
2808                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2809
2810                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2811
2812                 /*
2813                  * Preemption is disabled, but interrupts and NMIs
2814                  * can still come in now. If that happens after
2815                  * the above increment, then it will have to go
2816                  * back to the old method of allocating the event
2817                  * on the ring buffer, and if the filter fails, it
2818                  * will have to call ring_buffer_discard_commit()
2819                  * to remove it.
2820                  *
2821                  * Need to also check the unlikely case that the
2822                  * length is bigger than the temp buffer size.
2823                  * If that happens, then the reserve is pretty much
2824                  * guaranteed to fail, as the ring buffer currently
2825                  * only allows events less than a page. But that may
2826                  * change in the future, so let the ring buffer reserve
2827                  * handle the failure in that case.
2828                  */
2829                 if (val == 1 && likely(len <= max_len)) {
2830                         trace_event_setup(entry, type, trace_ctx);
2831                         entry->array[0] = len;
2832                         return entry;
2833                 }
2834                 this_cpu_dec(trace_buffered_event_cnt);
2835         }
2836
2837         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2838                                             trace_ctx);
2839         /*
2840          * If tracing is off, but we have triggers enabled
2841          * we still need to look at the event data. Use the temp_buffer
2842          * to store the trace event for the trigger to use. It's recursive
2843          * safe and will not be recorded anywhere.
2844          */
2845         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2846                 *current_rb = temp_buffer;
2847                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2848                                                     trace_ctx);
2849         }
2850         return entry;
2851 }
2852 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2853
2854 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2855 static DEFINE_MUTEX(tracepoint_printk_mutex);
2856
2857 static void output_printk(struct trace_event_buffer *fbuffer)
2858 {
2859         struct trace_event_call *event_call;
2860         struct trace_event_file *file;
2861         struct trace_event *event;
2862         unsigned long flags;
2863         struct trace_iterator *iter = tracepoint_print_iter;
2864
2865         /* We should never get here if iter is NULL */
2866         if (WARN_ON_ONCE(!iter))
2867                 return;
2868
2869         event_call = fbuffer->trace_file->event_call;
2870         if (!event_call || !event_call->event.funcs ||
2871             !event_call->event.funcs->trace)
2872                 return;
2873
2874         file = fbuffer->trace_file;
2875         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2876             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2877              !filter_match_preds(file->filter, fbuffer->entry)))
2878                 return;
2879
2880         event = &fbuffer->trace_file->event_call->event;
2881
2882         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2883         trace_seq_init(&iter->seq);
2884         iter->ent = fbuffer->entry;
2885         event_call->event.funcs->trace(iter, 0, event);
2886         trace_seq_putc(&iter->seq, 0);
2887         printk("%s", iter->seq.buffer);
2888
2889         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2890 }
2891
2892 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2893                              void *buffer, size_t *lenp,
2894                              loff_t *ppos)
2895 {
2896         int save_tracepoint_printk;
2897         int ret;
2898
2899         mutex_lock(&tracepoint_printk_mutex);
2900         save_tracepoint_printk = tracepoint_printk;
2901
2902         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2903
2904         /*
2905          * This will force exiting early, as tracepoint_printk
2906          * is always zero when tracepoint_printk_iter is not allocated
2907          */
2908         if (!tracepoint_print_iter)
2909                 tracepoint_printk = 0;
2910
2911         if (save_tracepoint_printk == tracepoint_printk)
2912                 goto out;
2913
2914         if (tracepoint_printk)
2915                 static_key_enable(&tracepoint_printk_key.key);
2916         else
2917                 static_key_disable(&tracepoint_printk_key.key);
2918
2919  out:
2920         mutex_unlock(&tracepoint_printk_mutex);
2921
2922         return ret;
2923 }
2924
2925 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2926 {
2927         enum event_trigger_type tt = ETT_NONE;
2928         struct trace_event_file *file = fbuffer->trace_file;
2929
2930         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2931                         fbuffer->entry, &tt))
2932                 goto discard;
2933
2934         if (static_key_false(&tracepoint_printk_key.key))
2935                 output_printk(fbuffer);
2936
2937         if (static_branch_unlikely(&trace_event_exports_enabled))
2938                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2939
2940         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2941                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2942
2943 discard:
2944         if (tt)
2945                 event_triggers_post_call(file, tt);
2946
2947 }
2948 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2949
2950 /*
2951  * Skip 3:
2952  *
2953  *   trace_buffer_unlock_commit_regs()
2954  *   trace_event_buffer_commit()
2955  *   trace_event_raw_event_xxx()
2956  */
2957 # define STACK_SKIP 3
2958
2959 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2960                                      struct trace_buffer *buffer,
2961                                      struct ring_buffer_event *event,
2962                                      unsigned int trace_ctx,
2963                                      struct pt_regs *regs)
2964 {
2965         __buffer_unlock_commit(buffer, event);
2966
2967         /*
2968          * If regs is not set, then skip the necessary functions.
2969          * Note, we can still get here via blktrace, wakeup tracer
2970          * and mmiotrace, but that's ok if they lose a function or
2971          * two. They are not that meaningful.
2972          */
2973         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2974         ftrace_trace_userstack(tr, buffer, trace_ctx);
2975 }
2976
2977 /*
2978  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2979  */
2980 void
2981 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2982                                    struct ring_buffer_event *event)
2983 {
2984         __buffer_unlock_commit(buffer, event);
2985 }
2986
2987 void
2988 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2989                parent_ip, unsigned int trace_ctx)
2990 {
2991         struct trace_event_call *call = &event_function;
2992         struct trace_buffer *buffer = tr->array_buffer.buffer;
2993         struct ring_buffer_event *event;
2994         struct ftrace_entry *entry;
2995
2996         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2997                                             trace_ctx);
2998         if (!event)
2999                 return;
3000         entry   = ring_buffer_event_data(event);
3001         entry->ip                       = ip;
3002         entry->parent_ip                = parent_ip;
3003
3004         if (!call_filter_check_discard(call, entry, buffer, event)) {
3005                 if (static_branch_unlikely(&trace_function_exports_enabled))
3006                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3007                 __buffer_unlock_commit(buffer, event);
3008         }
3009 }
3010
3011 #ifdef CONFIG_STACKTRACE
3012
3013 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3014 #define FTRACE_KSTACK_NESTING   4
3015
3016 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3017
3018 struct ftrace_stack {
3019         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3020 };
3021
3022
3023 struct ftrace_stacks {
3024         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3025 };
3026
3027 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3028 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3029
3030 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3031                                  unsigned int trace_ctx,
3032                                  int skip, struct pt_regs *regs)
3033 {
3034         struct trace_event_call *call = &event_kernel_stack;
3035         struct ring_buffer_event *event;
3036         unsigned int size, nr_entries;
3037         struct ftrace_stack *fstack;
3038         struct stack_entry *entry;
3039         int stackidx;
3040
3041         /*
3042          * Add one, for this function and the call to save_stack_trace()
3043          * If regs is set, then these functions will not be in the way.
3044          */
3045 #ifndef CONFIG_UNWINDER_ORC
3046         if (!regs)
3047                 skip++;
3048 #endif
3049
3050         preempt_disable_notrace();
3051
3052         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3053
3054         /* This should never happen. If it does, yell once and skip */
3055         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3056                 goto out;
3057
3058         /*
3059          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3060          * interrupt will either see the value pre increment or post
3061          * increment. If the interrupt happens pre increment it will have
3062          * restored the counter when it returns.  We just need a barrier to
3063          * keep gcc from moving things around.
3064          */
3065         barrier();
3066
3067         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3068         size = ARRAY_SIZE(fstack->calls);
3069
3070         if (regs) {
3071                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3072                                                    size, skip);
3073         } else {
3074                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3075         }
3076
3077         size = nr_entries * sizeof(unsigned long);
3078         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3079                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3080                                     trace_ctx);
3081         if (!event)
3082                 goto out;
3083         entry = ring_buffer_event_data(event);
3084
3085         memcpy(&entry->caller, fstack->calls, size);
3086         entry->size = nr_entries;
3087
3088         if (!call_filter_check_discard(call, entry, buffer, event))
3089                 __buffer_unlock_commit(buffer, event);
3090
3091  out:
3092         /* Again, don't let gcc optimize things here */
3093         barrier();
3094         __this_cpu_dec(ftrace_stack_reserve);
3095         preempt_enable_notrace();
3096
3097 }
3098
3099 static inline void ftrace_trace_stack(struct trace_array *tr,
3100                                       struct trace_buffer *buffer,
3101                                       unsigned int trace_ctx,
3102                                       int skip, struct pt_regs *regs)
3103 {
3104         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3105                 return;
3106
3107         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3108 }
3109
3110 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3111                    int skip)
3112 {
3113         struct trace_buffer *buffer = tr->array_buffer.buffer;
3114
3115         if (rcu_is_watching()) {
3116                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3117                 return;
3118         }
3119
3120         /*
3121          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3122          * but if the above rcu_is_watching() failed, then the NMI
3123          * triggered someplace critical, and rcu_irq_enter() should
3124          * not be called from NMI.
3125          */
3126         if (unlikely(in_nmi()))
3127                 return;
3128
3129         rcu_irq_enter_irqson();
3130         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3131         rcu_irq_exit_irqson();
3132 }
3133
3134 /**
3135  * trace_dump_stack - record a stack back trace in the trace buffer
3136  * @skip: Number of functions to skip (helper handlers)
3137  */
3138 void trace_dump_stack(int skip)
3139 {
3140         if (tracing_disabled || tracing_selftest_running)
3141                 return;
3142
3143 #ifndef CONFIG_UNWINDER_ORC
3144         /* Skip 1 to skip this function. */
3145         skip++;
3146 #endif
3147         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3148                              tracing_gen_ctx(), skip, NULL);
3149 }
3150 EXPORT_SYMBOL_GPL(trace_dump_stack);
3151
3152 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3153 static DEFINE_PER_CPU(int, user_stack_count);
3154
3155 static void
3156 ftrace_trace_userstack(struct trace_array *tr,
3157                        struct trace_buffer *buffer, unsigned int trace_ctx)
3158 {
3159         struct trace_event_call *call = &event_user_stack;
3160         struct ring_buffer_event *event;
3161         struct userstack_entry *entry;
3162
3163         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3164                 return;
3165
3166         /*
3167          * NMIs can not handle page faults, even with fix ups.
3168          * The save user stack can (and often does) fault.
3169          */
3170         if (unlikely(in_nmi()))
3171                 return;
3172
3173         /*
3174          * prevent recursion, since the user stack tracing may
3175          * trigger other kernel events.
3176          */
3177         preempt_disable();
3178         if (__this_cpu_read(user_stack_count))
3179                 goto out;
3180
3181         __this_cpu_inc(user_stack_count);
3182
3183         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3184                                             sizeof(*entry), trace_ctx);
3185         if (!event)
3186                 goto out_drop_count;
3187         entry   = ring_buffer_event_data(event);
3188
3189         entry->tgid             = current->tgid;
3190         memset(&entry->caller, 0, sizeof(entry->caller));
3191
3192         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3193         if (!call_filter_check_discard(call, entry, buffer, event))
3194                 __buffer_unlock_commit(buffer, event);
3195
3196  out_drop_count:
3197         __this_cpu_dec(user_stack_count);
3198  out:
3199         preempt_enable();
3200 }
3201 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3202 static void ftrace_trace_userstack(struct trace_array *tr,
3203                                    struct trace_buffer *buffer,
3204                                    unsigned int trace_ctx)
3205 {
3206 }
3207 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3208
3209 #endif /* CONFIG_STACKTRACE */
3210
3211 static inline void
3212 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3213                           unsigned long long delta)
3214 {
3215         entry->bottom_delta_ts = delta & U32_MAX;
3216         entry->top_delta_ts = (delta >> 32);
3217 }
3218
3219 void trace_last_func_repeats(struct trace_array *tr,
3220                              struct trace_func_repeats *last_info,
3221                              unsigned int trace_ctx)
3222 {
3223         struct trace_buffer *buffer = tr->array_buffer.buffer;
3224         struct func_repeats_entry *entry;
3225         struct ring_buffer_event *event;
3226         u64 delta;
3227
3228         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3229                                             sizeof(*entry), trace_ctx);
3230         if (!event)
3231                 return;
3232
3233         delta = ring_buffer_event_time_stamp(buffer, event) -
3234                 last_info->ts_last_call;
3235
3236         entry = ring_buffer_event_data(event);
3237         entry->ip = last_info->ip;
3238         entry->parent_ip = last_info->parent_ip;
3239         entry->count = last_info->count;
3240         func_repeats_set_delta_ts(entry, delta);
3241
3242         __buffer_unlock_commit(buffer, event);
3243 }
3244
3245 /* created for use with alloc_percpu */
3246 struct trace_buffer_struct {
3247         int nesting;
3248         char buffer[4][TRACE_BUF_SIZE];
3249 };
3250
3251 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3252
3253 /*
3254  * This allows for lockless recording.  If we're nested too deeply, then
3255  * this returns NULL.
3256  */
3257 static char *get_trace_buf(void)
3258 {
3259         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3260
3261         if (!trace_percpu_buffer || buffer->nesting >= 4)
3262                 return NULL;
3263
3264         buffer->nesting++;
3265
3266         /* Interrupts must see nesting incremented before we use the buffer */
3267         barrier();
3268         return &buffer->buffer[buffer->nesting - 1][0];
3269 }
3270
3271 static void put_trace_buf(void)
3272 {
3273         /* Don't let the decrement of nesting leak before this */
3274         barrier();
3275         this_cpu_dec(trace_percpu_buffer->nesting);
3276 }
3277
3278 static int alloc_percpu_trace_buffer(void)
3279 {
3280         struct trace_buffer_struct __percpu *buffers;
3281
3282         if (trace_percpu_buffer)
3283                 return 0;
3284
3285         buffers = alloc_percpu(struct trace_buffer_struct);
3286         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3287                 return -ENOMEM;
3288
3289         trace_percpu_buffer = buffers;
3290         return 0;
3291 }
3292
3293 static int buffers_allocated;
3294
3295 void trace_printk_init_buffers(void)
3296 {
3297         if (buffers_allocated)
3298                 return;
3299
3300         if (alloc_percpu_trace_buffer())
3301                 return;
3302
3303         /* trace_printk() is for debug use only. Don't use it in production. */
3304
3305         pr_warn("\n");
3306         pr_warn("**********************************************************\n");
3307         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3308         pr_warn("**                                                      **\n");
3309         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3310         pr_warn("**                                                      **\n");
3311         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3312         pr_warn("** unsafe for production use.                           **\n");
3313         pr_warn("**                                                      **\n");
3314         pr_warn("** If you see this message and you are not debugging    **\n");
3315         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3316         pr_warn("**                                                      **\n");
3317         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3318         pr_warn("**********************************************************\n");
3319
3320         /* Expand the buffers to set size */
3321         tracing_update_buffers();
3322
3323         buffers_allocated = 1;
3324
3325         /*
3326          * trace_printk_init_buffers() can be called by modules.
3327          * If that happens, then we need to start cmdline recording
3328          * directly here. If the global_trace.buffer is already
3329          * allocated here, then this was called by module code.
3330          */
3331         if (global_trace.array_buffer.buffer)
3332                 tracing_start_cmdline_record();
3333 }
3334 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3335
3336 void trace_printk_start_comm(void)
3337 {
3338         /* Start tracing comms if trace printk is set */
3339         if (!buffers_allocated)
3340                 return;
3341         tracing_start_cmdline_record();
3342 }
3343
3344 static void trace_printk_start_stop_comm(int enabled)
3345 {
3346         if (!buffers_allocated)
3347                 return;
3348
3349         if (enabled)
3350                 tracing_start_cmdline_record();
3351         else
3352                 tracing_stop_cmdline_record();
3353 }
3354
3355 /**
3356  * trace_vbprintk - write binary msg to tracing buffer
3357  * @ip:    The address of the caller
3358  * @fmt:   The string format to write to the buffer
3359  * @args:  Arguments for @fmt
3360  */
3361 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3362 {
3363         struct trace_event_call *call = &event_bprint;
3364         struct ring_buffer_event *event;
3365         struct trace_buffer *buffer;
3366         struct trace_array *tr = &global_trace;
3367         struct bprint_entry *entry;
3368         unsigned int trace_ctx;
3369         char *tbuffer;
3370         int len = 0, size;
3371
3372         if (unlikely(tracing_selftest_running || tracing_disabled))
3373                 return 0;
3374
3375         /* Don't pollute graph traces with trace_vprintk internals */
3376         pause_graph_tracing();
3377
3378         trace_ctx = tracing_gen_ctx();
3379         preempt_disable_notrace();
3380
3381         tbuffer = get_trace_buf();
3382         if (!tbuffer) {
3383                 len = 0;
3384                 goto out_nobuffer;
3385         }
3386
3387         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3388
3389         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3390                 goto out_put;
3391
3392         size = sizeof(*entry) + sizeof(u32) * len;
3393         buffer = tr->array_buffer.buffer;
3394         ring_buffer_nest_start(buffer);
3395         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3396                                             trace_ctx);
3397         if (!event)
3398                 goto out;
3399         entry = ring_buffer_event_data(event);
3400         entry->ip                       = ip;
3401         entry->fmt                      = fmt;
3402
3403         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3404         if (!call_filter_check_discard(call, entry, buffer, event)) {
3405                 __buffer_unlock_commit(buffer, event);
3406                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3407         }
3408
3409 out:
3410         ring_buffer_nest_end(buffer);
3411 out_put:
3412         put_trace_buf();
3413
3414 out_nobuffer:
3415         preempt_enable_notrace();
3416         unpause_graph_tracing();
3417
3418         return len;
3419 }
3420 EXPORT_SYMBOL_GPL(trace_vbprintk);
3421
3422 __printf(3, 0)
3423 static int
3424 __trace_array_vprintk(struct trace_buffer *buffer,
3425                       unsigned long ip, const char *fmt, va_list args)
3426 {
3427         struct trace_event_call *call = &event_print;
3428         struct ring_buffer_event *event;
3429         int len = 0, size;
3430         struct print_entry *entry;
3431         unsigned int trace_ctx;
3432         char *tbuffer;
3433
3434         if (tracing_disabled || tracing_selftest_running)
3435                 return 0;
3436
3437         /* Don't pollute graph traces with trace_vprintk internals */
3438         pause_graph_tracing();
3439
3440         trace_ctx = tracing_gen_ctx();
3441         preempt_disable_notrace();
3442
3443
3444         tbuffer = get_trace_buf();
3445         if (!tbuffer) {
3446                 len = 0;
3447                 goto out_nobuffer;
3448         }
3449
3450         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3451
3452         size = sizeof(*entry) + len + 1;
3453         ring_buffer_nest_start(buffer);
3454         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3455                                             trace_ctx);
3456         if (!event)
3457                 goto out;
3458         entry = ring_buffer_event_data(event);
3459         entry->ip = ip;
3460
3461         memcpy(&entry->buf, tbuffer, len + 1);
3462         if (!call_filter_check_discard(call, entry, buffer, event)) {
3463                 __buffer_unlock_commit(buffer, event);
3464                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3465         }
3466
3467 out:
3468         ring_buffer_nest_end(buffer);
3469         put_trace_buf();
3470
3471 out_nobuffer:
3472         preempt_enable_notrace();
3473         unpause_graph_tracing();
3474
3475         return len;
3476 }
3477
3478 __printf(3, 0)
3479 int trace_array_vprintk(struct trace_array *tr,
3480                         unsigned long ip, const char *fmt, va_list args)
3481 {
3482         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3483 }
3484
3485 /**
3486  * trace_array_printk - Print a message to a specific instance
3487  * @tr: The instance trace_array descriptor
3488  * @ip: The instruction pointer that this is called from.
3489  * @fmt: The format to print (printf format)
3490  *
3491  * If a subsystem sets up its own instance, they have the right to
3492  * printk strings into their tracing instance buffer using this
3493  * function. Note, this function will not write into the top level
3494  * buffer (use trace_printk() for that), as writing into the top level
3495  * buffer should only have events that can be individually disabled.
3496  * trace_printk() is only used for debugging a kernel, and should not
3497  * be ever incorporated in normal use.
3498  *
3499  * trace_array_printk() can be used, as it will not add noise to the
3500  * top level tracing buffer.
3501  *
3502  * Note, trace_array_init_printk() must be called on @tr before this
3503  * can be used.
3504  */
3505 __printf(3, 0)
3506 int trace_array_printk(struct trace_array *tr,
3507                        unsigned long ip, const char *fmt, ...)
3508 {
3509         int ret;
3510         va_list ap;
3511
3512         if (!tr)
3513                 return -ENOENT;
3514
3515         /* This is only allowed for created instances */
3516         if (tr == &global_trace)
3517                 return 0;
3518
3519         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3520                 return 0;
3521
3522         va_start(ap, fmt);
3523         ret = trace_array_vprintk(tr, ip, fmt, ap);
3524         va_end(ap);
3525         return ret;
3526 }
3527 EXPORT_SYMBOL_GPL(trace_array_printk);
3528
3529 /**
3530  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3531  * @tr: The trace array to initialize the buffers for
3532  *
3533  * As trace_array_printk() only writes into instances, they are OK to
3534  * have in the kernel (unlike trace_printk()). This needs to be called
3535  * before trace_array_printk() can be used on a trace_array.
3536  */
3537 int trace_array_init_printk(struct trace_array *tr)
3538 {
3539         if (!tr)
3540                 return -ENOENT;
3541
3542         /* This is only allowed for created instances */
3543         if (tr == &global_trace)
3544                 return -EINVAL;
3545
3546         return alloc_percpu_trace_buffer();
3547 }
3548 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3549
3550 __printf(3, 4)
3551 int trace_array_printk_buf(struct trace_buffer *buffer,
3552                            unsigned long ip, const char *fmt, ...)
3553 {
3554         int ret;
3555         va_list ap;
3556
3557         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3558                 return 0;
3559
3560         va_start(ap, fmt);
3561         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3562         va_end(ap);
3563         return ret;
3564 }
3565
3566 __printf(2, 0)
3567 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3568 {
3569         return trace_array_vprintk(&global_trace, ip, fmt, args);
3570 }
3571 EXPORT_SYMBOL_GPL(trace_vprintk);
3572
3573 static void trace_iterator_increment(struct trace_iterator *iter)
3574 {
3575         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3576
3577         iter->idx++;
3578         if (buf_iter)
3579                 ring_buffer_iter_advance(buf_iter);
3580 }
3581
3582 static struct trace_entry *
3583 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3584                 unsigned long *lost_events)
3585 {
3586         struct ring_buffer_event *event;
3587         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3588
3589         if (buf_iter) {
3590                 event = ring_buffer_iter_peek(buf_iter, ts);
3591                 if (lost_events)
3592                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3593                                 (unsigned long)-1 : 0;
3594         } else {
3595                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3596                                          lost_events);
3597         }
3598
3599         if (event) {
3600                 iter->ent_size = ring_buffer_event_length(event);
3601                 return ring_buffer_event_data(event);
3602         }
3603         iter->ent_size = 0;
3604         return NULL;
3605 }
3606
3607 static struct trace_entry *
3608 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3609                   unsigned long *missing_events, u64 *ent_ts)
3610 {
3611         struct trace_buffer *buffer = iter->array_buffer->buffer;
3612         struct trace_entry *ent, *next = NULL;
3613         unsigned long lost_events = 0, next_lost = 0;
3614         int cpu_file = iter->cpu_file;
3615         u64 next_ts = 0, ts;
3616         int next_cpu = -1;
3617         int next_size = 0;
3618         int cpu;
3619
3620         /*
3621          * If we are in a per_cpu trace file, don't bother by iterating over
3622          * all cpu and peek directly.
3623          */
3624         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3625                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3626                         return NULL;
3627                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3628                 if (ent_cpu)
3629                         *ent_cpu = cpu_file;
3630
3631                 return ent;
3632         }
3633
3634         for_each_tracing_cpu(cpu) {
3635
3636                 if (ring_buffer_empty_cpu(buffer, cpu))
3637                         continue;
3638
3639                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3640
3641                 /*
3642                  * Pick the entry with the smallest timestamp:
3643                  */
3644                 if (ent && (!next || ts < next_ts)) {
3645                         next = ent;
3646                         next_cpu = cpu;
3647                         next_ts = ts;
3648                         next_lost = lost_events;
3649                         next_size = iter->ent_size;
3650                 }
3651         }
3652
3653         iter->ent_size = next_size;
3654
3655         if (ent_cpu)
3656                 *ent_cpu = next_cpu;
3657
3658         if (ent_ts)
3659                 *ent_ts = next_ts;
3660
3661         if (missing_events)
3662                 *missing_events = next_lost;
3663
3664         return next;
3665 }
3666
3667 #define STATIC_FMT_BUF_SIZE     128
3668 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3669
3670 static char *trace_iter_expand_format(struct trace_iterator *iter)
3671 {
3672         char *tmp;
3673
3674         /*
3675          * iter->tr is NULL when used with tp_printk, which makes
3676          * this get called where it is not safe to call krealloc().
3677          */
3678         if (!iter->tr || iter->fmt == static_fmt_buf)
3679                 return NULL;
3680
3681         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3682                        GFP_KERNEL);
3683         if (tmp) {
3684                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3685                 iter->fmt = tmp;
3686         }
3687
3688         return tmp;
3689 }
3690
3691 /* Returns true if the string is safe to dereference from an event */
3692 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3693                            bool star, int len)
3694 {
3695         unsigned long addr = (unsigned long)str;
3696         struct trace_event *trace_event;
3697         struct trace_event_call *event;
3698
3699         /* Ignore strings with no length */
3700         if (star && !len)
3701                 return true;
3702
3703         /* OK if part of the event data */
3704         if ((addr >= (unsigned long)iter->ent) &&
3705             (addr < (unsigned long)iter->ent + iter->ent_size))
3706                 return true;
3707
3708         /* OK if part of the temp seq buffer */
3709         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3710             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3711                 return true;
3712
3713         /* Core rodata can not be freed */
3714         if (is_kernel_rodata(addr))
3715                 return true;
3716
3717         if (trace_is_tracepoint_string(str))
3718                 return true;
3719
3720         /*
3721          * Now this could be a module event, referencing core module
3722          * data, which is OK.
3723          */
3724         if (!iter->ent)
3725                 return false;
3726
3727         trace_event = ftrace_find_event(iter->ent->type);
3728         if (!trace_event)
3729                 return false;
3730
3731         event = container_of(trace_event, struct trace_event_call, event);
3732         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3733                 return false;
3734
3735         /* Would rather have rodata, but this will suffice */
3736         if (within_module_core(addr, event->module))
3737                 return true;
3738
3739         return false;
3740 }
3741
3742 static const char *show_buffer(struct trace_seq *s)
3743 {
3744         struct seq_buf *seq = &s->seq;
3745
3746         seq_buf_terminate(seq);
3747
3748         return seq->buffer;
3749 }
3750
3751 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3752
3753 static int test_can_verify_check(const char *fmt, ...)
3754 {
3755         char buf[16];
3756         va_list ap;
3757         int ret;
3758
3759         /*
3760          * The verifier is dependent on vsnprintf() modifies the va_list
3761          * passed to it, where it is sent as a reference. Some architectures
3762          * (like x86_32) passes it by value, which means that vsnprintf()
3763          * does not modify the va_list passed to it, and the verifier
3764          * would then need to be able to understand all the values that
3765          * vsnprintf can use. If it is passed by value, then the verifier
3766          * is disabled.
3767          */
3768         va_start(ap, fmt);
3769         vsnprintf(buf, 16, "%d", ap);
3770         ret = va_arg(ap, int);
3771         va_end(ap);
3772
3773         return ret;
3774 }
3775
3776 static void test_can_verify(void)
3777 {
3778         if (!test_can_verify_check("%d %d", 0, 1)) {
3779                 pr_info("trace event string verifier disabled\n");
3780                 static_branch_inc(&trace_no_verify);
3781         }
3782 }
3783
3784 /**
3785  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3786  * @iter: The iterator that holds the seq buffer and the event being printed
3787  * @fmt: The format used to print the event
3788  * @ap: The va_list holding the data to print from @fmt.
3789  *
3790  * This writes the data into the @iter->seq buffer using the data from
3791  * @fmt and @ap. If the format has a %s, then the source of the string
3792  * is examined to make sure it is safe to print, otherwise it will
3793  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3794  * pointer.
3795  */
3796 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3797                          va_list ap)
3798 {
3799         const char *p = fmt;
3800         const char *str;
3801         int i, j;
3802
3803         if (WARN_ON_ONCE(!fmt))
3804                 return;
3805
3806         if (static_branch_unlikely(&trace_no_verify))
3807                 goto print;
3808
3809         /* Don't bother checking when doing a ftrace_dump() */
3810         if (iter->fmt == static_fmt_buf)
3811                 goto print;
3812
3813         while (*p) {
3814                 bool star = false;
3815                 int len = 0;
3816
3817                 j = 0;
3818
3819                 /* We only care about %s and variants */
3820                 for (i = 0; p[i]; i++) {
3821                         if (i + 1 >= iter->fmt_size) {
3822                                 /*
3823                                  * If we can't expand the copy buffer,
3824                                  * just print it.
3825                                  */
3826                                 if (!trace_iter_expand_format(iter))
3827                                         goto print;
3828                         }
3829
3830                         if (p[i] == '\\' && p[i+1]) {
3831                                 i++;
3832                                 continue;
3833                         }
3834                         if (p[i] == '%') {
3835                                 /* Need to test cases like %08.*s */
3836                                 for (j = 1; p[i+j]; j++) {
3837                                         if (isdigit(p[i+j]) ||
3838                                             p[i+j] == '.')
3839                                                 continue;
3840                                         if (p[i+j] == '*') {
3841                                                 star = true;
3842                                                 continue;
3843                                         }
3844                                         break;
3845                                 }
3846                                 if (p[i+j] == 's')
3847                                         break;
3848                                 star = false;
3849                         }
3850                         j = 0;
3851                 }
3852                 /* If no %s found then just print normally */
3853                 if (!p[i])
3854                         break;
3855
3856                 /* Copy up to the %s, and print that */
3857                 strncpy(iter->fmt, p, i);
3858                 iter->fmt[i] = '\0';
3859                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3860
3861                 /*
3862                  * If iter->seq is full, the above call no longer guarantees
3863                  * that ap is in sync with fmt processing, and further calls
3864                  * to va_arg() can return wrong positional arguments.
3865                  *
3866                  * Ensure that ap is no longer used in this case.
3867                  */
3868                 if (iter->seq.full) {
3869                         p = "";
3870                         break;
3871                 }
3872
3873                 if (star)
3874                         len = va_arg(ap, int);
3875
3876                 /* The ap now points to the string data of the %s */
3877                 str = va_arg(ap, const char *);
3878
3879                 /*
3880                  * If you hit this warning, it is likely that the
3881                  * trace event in question used %s on a string that
3882                  * was saved at the time of the event, but may not be
3883                  * around when the trace is read. Use __string(),
3884                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3885                  * instead. See samples/trace_events/trace-events-sample.h
3886                  * for reference.
3887                  */
3888                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3889                               "fmt: '%s' current_buffer: '%s'",
3890                               fmt, show_buffer(&iter->seq))) {
3891                         int ret;
3892
3893                         /* Try to safely read the string */
3894                         if (star) {
3895                                 if (len + 1 > iter->fmt_size)
3896                                         len = iter->fmt_size - 1;
3897                                 if (len < 0)
3898                                         len = 0;
3899                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3900                                 iter->fmt[len] = 0;
3901                                 star = false;
3902                         } else {
3903                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3904                                                                   iter->fmt_size);
3905                         }
3906                         if (ret < 0)
3907                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3908                         else
3909                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3910                                                  str, iter->fmt);
3911                         str = "[UNSAFE-MEMORY]";
3912                         strcpy(iter->fmt, "%s");
3913                 } else {
3914                         strncpy(iter->fmt, p + i, j + 1);
3915                         iter->fmt[j+1] = '\0';
3916                 }
3917                 if (star)
3918                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3919                 else
3920                         trace_seq_printf(&iter->seq, iter->fmt, str);
3921
3922                 p += i + j + 1;
3923         }
3924  print:
3925         if (*p)
3926                 trace_seq_vprintf(&iter->seq, p, ap);
3927 }
3928
3929 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3930 {
3931         const char *p, *new_fmt;
3932         char *q;
3933
3934         if (WARN_ON_ONCE(!fmt))
3935                 return fmt;
3936
3937         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3938                 return fmt;
3939
3940         p = fmt;
3941         new_fmt = q = iter->fmt;
3942         while (*p) {
3943                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3944                         if (!trace_iter_expand_format(iter))
3945                                 return fmt;
3946
3947                         q += iter->fmt - new_fmt;
3948                         new_fmt = iter->fmt;
3949                 }
3950
3951                 *q++ = *p++;
3952
3953                 /* Replace %p with %px */
3954                 if (p[-1] == '%') {
3955                         if (p[0] == '%') {
3956                                 *q++ = *p++;
3957                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3958                                 *q++ = *p++;
3959                                 *q++ = 'x';
3960                         }
3961                 }
3962         }
3963         *q = '\0';
3964
3965         return new_fmt;
3966 }
3967
3968 #define STATIC_TEMP_BUF_SIZE    128
3969 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3970
3971 /* Find the next real entry, without updating the iterator itself */
3972 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3973                                           int *ent_cpu, u64 *ent_ts)
3974 {
3975         /* __find_next_entry will reset ent_size */
3976         int ent_size = iter->ent_size;
3977         struct trace_entry *entry;
3978
3979         /*
3980          * If called from ftrace_dump(), then the iter->temp buffer
3981          * will be the static_temp_buf and not created from kmalloc.
3982          * If the entry size is greater than the buffer, we can
3983          * not save it. Just return NULL in that case. This is only
3984          * used to add markers when two consecutive events' time
3985          * stamps have a large delta. See trace_print_lat_context()
3986          */
3987         if (iter->temp == static_temp_buf &&
3988             STATIC_TEMP_BUF_SIZE < ent_size)
3989                 return NULL;
3990
3991         /*
3992          * The __find_next_entry() may call peek_next_entry(), which may
3993          * call ring_buffer_peek() that may make the contents of iter->ent
3994          * undefined. Need to copy iter->ent now.
3995          */
3996         if (iter->ent && iter->ent != iter->temp) {
3997                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3998                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3999                         void *temp;
4000                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4001                         if (!temp)
4002                                 return NULL;
4003                         kfree(iter->temp);
4004                         iter->temp = temp;
4005                         iter->temp_size = iter->ent_size;
4006                 }
4007                 memcpy(iter->temp, iter->ent, iter->ent_size);
4008                 iter->ent = iter->temp;
4009         }
4010         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4011         /* Put back the original ent_size */
4012         iter->ent_size = ent_size;
4013
4014         return entry;
4015 }
4016
4017 /* Find the next real entry, and increment the iterator to the next entry */
4018 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4019 {
4020         iter->ent = __find_next_entry(iter, &iter->cpu,
4021                                       &iter->lost_events, &iter->ts);
4022
4023         if (iter->ent)
4024                 trace_iterator_increment(iter);
4025
4026         return iter->ent ? iter : NULL;
4027 }
4028
4029 static void trace_consume(struct trace_iterator *iter)
4030 {
4031         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4032                             &iter->lost_events);
4033 }
4034
4035 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4036 {
4037         struct trace_iterator *iter = m->private;
4038         int i = (int)*pos;
4039         void *ent;
4040
4041         WARN_ON_ONCE(iter->leftover);
4042
4043         (*pos)++;
4044
4045         /* can't go backwards */
4046         if (iter->idx > i)
4047                 return NULL;
4048
4049         if (iter->idx < 0)
4050                 ent = trace_find_next_entry_inc(iter);
4051         else
4052                 ent = iter;
4053
4054         while (ent && iter->idx < i)
4055                 ent = trace_find_next_entry_inc(iter);
4056
4057         iter->pos = *pos;
4058
4059         return ent;
4060 }
4061
4062 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4063 {
4064         struct ring_buffer_iter *buf_iter;
4065         unsigned long entries = 0;
4066         u64 ts;
4067
4068         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4069
4070         buf_iter = trace_buffer_iter(iter, cpu);
4071         if (!buf_iter)
4072                 return;
4073
4074         ring_buffer_iter_reset(buf_iter);
4075
4076         /*
4077          * We could have the case with the max latency tracers
4078          * that a reset never took place on a cpu. This is evident
4079          * by the timestamp being before the start of the buffer.
4080          */
4081         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4082                 if (ts >= iter->array_buffer->time_start)
4083                         break;
4084                 entries++;
4085                 ring_buffer_iter_advance(buf_iter);
4086         }
4087
4088         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4089 }
4090
4091 /*
4092  * The current tracer is copied to avoid a global locking
4093  * all around.
4094  */
4095 static void *s_start(struct seq_file *m, loff_t *pos)
4096 {
4097         struct trace_iterator *iter = m->private;
4098         struct trace_array *tr = iter->tr;
4099         int cpu_file = iter->cpu_file;
4100         void *p = NULL;
4101         loff_t l = 0;
4102         int cpu;
4103
4104         /*
4105          * copy the tracer to avoid using a global lock all around.
4106          * iter->trace is a copy of current_trace, the pointer to the
4107          * name may be used instead of a strcmp(), as iter->trace->name
4108          * will point to the same string as current_trace->name.
4109          */
4110         mutex_lock(&trace_types_lock);
4111         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4112                 /* Close iter->trace before switching to the new current tracer */
4113                 if (iter->trace->close)
4114                         iter->trace->close(iter);
4115                 *iter->trace = *tr->current_trace;
4116                 /* Reopen the new current tracer */
4117                 if (iter->trace->open)
4118                         iter->trace->open(iter);
4119         }
4120         mutex_unlock(&trace_types_lock);
4121
4122 #ifdef CONFIG_TRACER_MAX_TRACE
4123         if (iter->snapshot && iter->trace->use_max_tr)
4124                 return ERR_PTR(-EBUSY);
4125 #endif
4126
4127         if (*pos != iter->pos) {
4128                 iter->ent = NULL;
4129                 iter->cpu = 0;
4130                 iter->idx = -1;
4131
4132                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4133                         for_each_tracing_cpu(cpu)
4134                                 tracing_iter_reset(iter, cpu);
4135                 } else
4136                         tracing_iter_reset(iter, cpu_file);
4137
4138                 iter->leftover = 0;
4139                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4140                         ;
4141
4142         } else {
4143                 /*
4144                  * If we overflowed the seq_file before, then we want
4145                  * to just reuse the trace_seq buffer again.
4146                  */
4147                 if (iter->leftover)
4148                         p = iter;
4149                 else {
4150                         l = *pos - 1;
4151                         p = s_next(m, p, &l);
4152                 }
4153         }
4154
4155         trace_event_read_lock();
4156         trace_access_lock(cpu_file);
4157         return p;
4158 }
4159
4160 static void s_stop(struct seq_file *m, void *p)
4161 {
4162         struct trace_iterator *iter = m->private;
4163
4164 #ifdef CONFIG_TRACER_MAX_TRACE
4165         if (iter->snapshot && iter->trace->use_max_tr)
4166                 return;
4167 #endif
4168
4169         trace_access_unlock(iter->cpu_file);
4170         trace_event_read_unlock();
4171 }
4172
4173 static void
4174 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4175                       unsigned long *entries, int cpu)
4176 {
4177         unsigned long count;
4178
4179         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4180         /*
4181          * If this buffer has skipped entries, then we hold all
4182          * entries for the trace and we need to ignore the
4183          * ones before the time stamp.
4184          */
4185         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4186                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4187                 /* total is the same as the entries */
4188                 *total = count;
4189         } else
4190                 *total = count +
4191                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4192         *entries = count;
4193 }
4194
4195 static void
4196 get_total_entries(struct array_buffer *buf,
4197                   unsigned long *total, unsigned long *entries)
4198 {
4199         unsigned long t, e;
4200         int cpu;
4201
4202         *total = 0;
4203         *entries = 0;
4204
4205         for_each_tracing_cpu(cpu) {
4206                 get_total_entries_cpu(buf, &t, &e, cpu);
4207                 *total += t;
4208                 *entries += e;
4209         }
4210 }
4211
4212 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4213 {
4214         unsigned long total, entries;
4215
4216         if (!tr)
4217                 tr = &global_trace;
4218
4219         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4220
4221         return entries;
4222 }
4223
4224 unsigned long trace_total_entries(struct trace_array *tr)
4225 {
4226         unsigned long total, entries;
4227
4228         if (!tr)
4229                 tr = &global_trace;
4230
4231         get_total_entries(&tr->array_buffer, &total, &entries);
4232
4233         return entries;
4234 }
4235
4236 static void print_lat_help_header(struct seq_file *m)
4237 {
4238         seq_puts(m, "#                    _------=> CPU#            \n"
4239                     "#                   / _-----=> irqs-off        \n"
4240                     "#                  | / _----=> need-resched    \n"
4241                     "#                  || / _---=> hardirq/softirq \n"
4242                     "#                  ||| / _--=> preempt-depth   \n"
4243                     "#                  |||| / _-=> migrate-disable \n"
4244                     "#                  ||||| /     delay           \n"
4245                     "#  cmd     pid     |||||| time  |   caller     \n"
4246                     "#     \\   /        ||||||  \\    |    /       \n");
4247 }
4248
4249 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4250 {
4251         unsigned long total;
4252         unsigned long entries;
4253
4254         get_total_entries(buf, &total, &entries);
4255         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4256                    entries, total, num_online_cpus());
4257         seq_puts(m, "#\n");
4258 }
4259
4260 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4261                                    unsigned int flags)
4262 {
4263         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4264
4265         print_event_info(buf, m);
4266
4267         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4268         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4269 }
4270
4271 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4272                                        unsigned int flags)
4273 {
4274         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4275         const char *space = "            ";
4276         int prec = tgid ? 12 : 2;
4277
4278         print_event_info(buf, m);
4279
4280         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4281         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4282         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4283         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4284         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4285         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4286         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4287         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4288 }
4289
4290 void
4291 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4292 {
4293         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4294         struct array_buffer *buf = iter->array_buffer;
4295         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4296         struct tracer *type = iter->trace;
4297         unsigned long entries;
4298         unsigned long total;
4299         const char *name = "preemption";
4300
4301         name = type->name;
4302
4303         get_total_entries(buf, &total, &entries);
4304
4305         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4306                    name, UTS_RELEASE);
4307         seq_puts(m, "# -----------------------------------"
4308                  "---------------------------------\n");
4309         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4310                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4311                    nsecs_to_usecs(data->saved_latency),
4312                    entries,
4313                    total,
4314                    buf->cpu,
4315 #if defined(CONFIG_PREEMPT_NONE)
4316                    "server",
4317 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4318                    "desktop",
4319 #elif defined(CONFIG_PREEMPT)
4320                    "preempt",
4321 #elif defined(CONFIG_PREEMPT_RT)
4322                    "preempt_rt",
4323 #else
4324                    "unknown",
4325 #endif
4326                    /* These are reserved for later use */
4327                    0, 0, 0, 0);
4328 #ifdef CONFIG_SMP
4329         seq_printf(m, " #P:%d)\n", num_online_cpus());
4330 #else
4331         seq_puts(m, ")\n");
4332 #endif
4333         seq_puts(m, "#    -----------------\n");
4334         seq_printf(m, "#    | task: %.16s-%d "
4335                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4336                    data->comm, data->pid,
4337                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4338                    data->policy, data->rt_priority);
4339         seq_puts(m, "#    -----------------\n");
4340
4341         if (data->critical_start) {
4342                 seq_puts(m, "#  => started at: ");
4343                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4344                 trace_print_seq(m, &iter->seq);
4345                 seq_puts(m, "\n#  => ended at:   ");
4346                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4347                 trace_print_seq(m, &iter->seq);
4348                 seq_puts(m, "\n#\n");
4349         }
4350
4351         seq_puts(m, "#\n");
4352 }
4353
4354 static void test_cpu_buff_start(struct trace_iterator *iter)
4355 {
4356         struct trace_seq *s = &iter->seq;
4357         struct trace_array *tr = iter->tr;
4358
4359         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4360                 return;
4361
4362         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4363                 return;
4364
4365         if (cpumask_available(iter->started) &&
4366             cpumask_test_cpu(iter->cpu, iter->started))
4367                 return;
4368
4369         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4370                 return;
4371
4372         if (cpumask_available(iter->started))
4373                 cpumask_set_cpu(iter->cpu, iter->started);
4374
4375         /* Don't print started cpu buffer for the first entry of the trace */
4376         if (iter->idx > 1)
4377                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4378                                 iter->cpu);
4379 }
4380
4381 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4382 {
4383         struct trace_array *tr = iter->tr;
4384         struct trace_seq *s = &iter->seq;
4385         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4386         struct trace_entry *entry;
4387         struct trace_event *event;
4388
4389         entry = iter->ent;
4390
4391         test_cpu_buff_start(iter);
4392
4393         event = ftrace_find_event(entry->type);
4394
4395         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4397                         trace_print_lat_context(iter);
4398                 else
4399                         trace_print_context(iter);
4400         }
4401
4402         if (trace_seq_has_overflowed(s))
4403                 return TRACE_TYPE_PARTIAL_LINE;
4404
4405         if (event)
4406                 return event->funcs->trace(iter, sym_flags, event);
4407
4408         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4409
4410         return trace_handle_return(s);
4411 }
4412
4413 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4414 {
4415         struct trace_array *tr = iter->tr;
4416         struct trace_seq *s = &iter->seq;
4417         struct trace_entry *entry;
4418         struct trace_event *event;
4419
4420         entry = iter->ent;
4421
4422         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4423                 trace_seq_printf(s, "%d %d %llu ",
4424                                  entry->pid, iter->cpu, iter->ts);
4425
4426         if (trace_seq_has_overflowed(s))
4427                 return TRACE_TYPE_PARTIAL_LINE;
4428
4429         event = ftrace_find_event(entry->type);
4430         if (event)
4431                 return event->funcs->raw(iter, 0, event);
4432
4433         trace_seq_printf(s, "%d ?\n", entry->type);
4434
4435         return trace_handle_return(s);
4436 }
4437
4438 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4439 {
4440         struct trace_array *tr = iter->tr;
4441         struct trace_seq *s = &iter->seq;
4442         unsigned char newline = '\n';
4443         struct trace_entry *entry;
4444         struct trace_event *event;
4445
4446         entry = iter->ent;
4447
4448         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4449                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4450                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4451                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4452                 if (trace_seq_has_overflowed(s))
4453                         return TRACE_TYPE_PARTIAL_LINE;
4454         }
4455
4456         event = ftrace_find_event(entry->type);
4457         if (event) {
4458                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4459                 if (ret != TRACE_TYPE_HANDLED)
4460                         return ret;
4461         }
4462
4463         SEQ_PUT_FIELD(s, newline);
4464
4465         return trace_handle_return(s);
4466 }
4467
4468 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4469 {
4470         struct trace_array *tr = iter->tr;
4471         struct trace_seq *s = &iter->seq;
4472         struct trace_entry *entry;
4473         struct trace_event *event;
4474
4475         entry = iter->ent;
4476
4477         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4478                 SEQ_PUT_FIELD(s, entry->pid);
4479                 SEQ_PUT_FIELD(s, iter->cpu);
4480                 SEQ_PUT_FIELD(s, iter->ts);
4481                 if (trace_seq_has_overflowed(s))
4482                         return TRACE_TYPE_PARTIAL_LINE;
4483         }
4484
4485         event = ftrace_find_event(entry->type);
4486         return event ? event->funcs->binary(iter, 0, event) :
4487                 TRACE_TYPE_HANDLED;
4488 }
4489
4490 int trace_empty(struct trace_iterator *iter)
4491 {
4492         struct ring_buffer_iter *buf_iter;
4493         int cpu;
4494
4495         /* If we are looking at one CPU buffer, only check that one */
4496         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4497                 cpu = iter->cpu_file;
4498                 buf_iter = trace_buffer_iter(iter, cpu);
4499                 if (buf_iter) {
4500                         if (!ring_buffer_iter_empty(buf_iter))
4501                                 return 0;
4502                 } else {
4503                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4504                                 return 0;
4505                 }
4506                 return 1;
4507         }
4508
4509         for_each_tracing_cpu(cpu) {
4510                 buf_iter = trace_buffer_iter(iter, cpu);
4511                 if (buf_iter) {
4512                         if (!ring_buffer_iter_empty(buf_iter))
4513                                 return 0;
4514                 } else {
4515                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4516                                 return 0;
4517                 }
4518         }
4519
4520         return 1;
4521 }
4522
4523 /*  Called with trace_event_read_lock() held. */
4524 enum print_line_t print_trace_line(struct trace_iterator *iter)
4525 {
4526         struct trace_array *tr = iter->tr;
4527         unsigned long trace_flags = tr->trace_flags;
4528         enum print_line_t ret;
4529
4530         if (iter->lost_events) {
4531                 if (iter->lost_events == (unsigned long)-1)
4532                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4533                                          iter->cpu);
4534                 else
4535                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4536                                          iter->cpu, iter->lost_events);
4537                 if (trace_seq_has_overflowed(&iter->seq))
4538                         return TRACE_TYPE_PARTIAL_LINE;
4539         }
4540
4541         if (iter->trace && iter->trace->print_line) {
4542                 ret = iter->trace->print_line(iter);
4543                 if (ret != TRACE_TYPE_UNHANDLED)
4544                         return ret;
4545         }
4546
4547         if (iter->ent->type == TRACE_BPUTS &&
4548                         trace_flags & TRACE_ITER_PRINTK &&
4549                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4550                 return trace_print_bputs_msg_only(iter);
4551
4552         if (iter->ent->type == TRACE_BPRINT &&
4553                         trace_flags & TRACE_ITER_PRINTK &&
4554                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4555                 return trace_print_bprintk_msg_only(iter);
4556
4557         if (iter->ent->type == TRACE_PRINT &&
4558                         trace_flags & TRACE_ITER_PRINTK &&
4559                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4560                 return trace_print_printk_msg_only(iter);
4561
4562         if (trace_flags & TRACE_ITER_BIN)
4563                 return print_bin_fmt(iter);
4564
4565         if (trace_flags & TRACE_ITER_HEX)
4566                 return print_hex_fmt(iter);
4567
4568         if (trace_flags & TRACE_ITER_RAW)
4569                 return print_raw_fmt(iter);
4570
4571         return print_trace_fmt(iter);
4572 }
4573
4574 void trace_latency_header(struct seq_file *m)
4575 {
4576         struct trace_iterator *iter = m->private;
4577         struct trace_array *tr = iter->tr;
4578
4579         /* print nothing if the buffers are empty */
4580         if (trace_empty(iter))
4581                 return;
4582
4583         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4584                 print_trace_header(m, iter);
4585
4586         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4587                 print_lat_help_header(m);
4588 }
4589
4590 void trace_default_header(struct seq_file *m)
4591 {
4592         struct trace_iterator *iter = m->private;
4593         struct trace_array *tr = iter->tr;
4594         unsigned long trace_flags = tr->trace_flags;
4595
4596         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4597                 return;
4598
4599         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4600                 /* print nothing if the buffers are empty */
4601                 if (trace_empty(iter))
4602                         return;
4603                 print_trace_header(m, iter);
4604                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4605                         print_lat_help_header(m);
4606         } else {
4607                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4608                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4609                                 print_func_help_header_irq(iter->array_buffer,
4610                                                            m, trace_flags);
4611                         else
4612                                 print_func_help_header(iter->array_buffer, m,
4613                                                        trace_flags);
4614                 }
4615         }
4616 }
4617
4618 static void test_ftrace_alive(struct seq_file *m)
4619 {
4620         if (!ftrace_is_dead())
4621                 return;
4622         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4623                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4624 }
4625
4626 #ifdef CONFIG_TRACER_MAX_TRACE
4627 static void show_snapshot_main_help(struct seq_file *m)
4628 {
4629         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4630                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4631                     "#                      Takes a snapshot of the main buffer.\n"
4632                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4633                     "#                      (Doesn't have to be '2' works with any number that\n"
4634                     "#                       is not a '0' or '1')\n");
4635 }
4636
4637 static void show_snapshot_percpu_help(struct seq_file *m)
4638 {
4639         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4640 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4641         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4642                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4643 #else
4644         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4645                     "#                     Must use main snapshot file to allocate.\n");
4646 #endif
4647         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4648                     "#                      (Doesn't have to be '2' works with any number that\n"
4649                     "#                       is not a '0' or '1')\n");
4650 }
4651
4652 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4653 {
4654         if (iter->tr->allocated_snapshot)
4655                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4656         else
4657                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4658
4659         seq_puts(m, "# Snapshot commands:\n");
4660         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4661                 show_snapshot_main_help(m);
4662         else
4663                 show_snapshot_percpu_help(m);
4664 }
4665 #else
4666 /* Should never be called */
4667 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4668 #endif
4669
4670 static int s_show(struct seq_file *m, void *v)
4671 {
4672         struct trace_iterator *iter = v;
4673         int ret;
4674
4675         if (iter->ent == NULL) {
4676                 if (iter->tr) {
4677                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4678                         seq_puts(m, "#\n");
4679                         test_ftrace_alive(m);
4680                 }
4681                 if (iter->snapshot && trace_empty(iter))
4682                         print_snapshot_help(m, iter);
4683                 else if (iter->trace && iter->trace->print_header)
4684                         iter->trace->print_header(m);
4685                 else
4686                         trace_default_header(m);
4687
4688         } else if (iter->leftover) {
4689                 /*
4690                  * If we filled the seq_file buffer earlier, we
4691                  * want to just show it now.
4692                  */
4693                 ret = trace_print_seq(m, &iter->seq);
4694
4695                 /* ret should this time be zero, but you never know */
4696                 iter->leftover = ret;
4697
4698         } else {
4699                 print_trace_line(iter);
4700                 ret = trace_print_seq(m, &iter->seq);
4701                 /*
4702                  * If we overflow the seq_file buffer, then it will
4703                  * ask us for this data again at start up.
4704                  * Use that instead.
4705                  *  ret is 0 if seq_file write succeeded.
4706                  *        -1 otherwise.
4707                  */
4708                 iter->leftover = ret;
4709         }
4710
4711         return 0;
4712 }
4713
4714 /*
4715  * Should be used after trace_array_get(), trace_types_lock
4716  * ensures that i_cdev was already initialized.
4717  */
4718 static inline int tracing_get_cpu(struct inode *inode)
4719 {
4720         if (inode->i_cdev) /* See trace_create_cpu_file() */
4721                 return (long)inode->i_cdev - 1;
4722         return RING_BUFFER_ALL_CPUS;
4723 }
4724
4725 static const struct seq_operations tracer_seq_ops = {
4726         .start          = s_start,
4727         .next           = s_next,
4728         .stop           = s_stop,
4729         .show           = s_show,
4730 };
4731
4732 static struct trace_iterator *
4733 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4734 {
4735         struct trace_array *tr = inode->i_private;
4736         struct trace_iterator *iter;
4737         int cpu;
4738
4739         if (tracing_disabled)
4740                 return ERR_PTR(-ENODEV);
4741
4742         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4743         if (!iter)
4744                 return ERR_PTR(-ENOMEM);
4745
4746         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4747                                     GFP_KERNEL);
4748         if (!iter->buffer_iter)
4749                 goto release;
4750
4751         /*
4752          * trace_find_next_entry() may need to save off iter->ent.
4753          * It will place it into the iter->temp buffer. As most
4754          * events are less than 128, allocate a buffer of that size.
4755          * If one is greater, then trace_find_next_entry() will
4756          * allocate a new buffer to adjust for the bigger iter->ent.
4757          * It's not critical if it fails to get allocated here.
4758          */
4759         iter->temp = kmalloc(128, GFP_KERNEL);
4760         if (iter->temp)
4761                 iter->temp_size = 128;
4762
4763         /*
4764          * trace_event_printf() may need to modify given format
4765          * string to replace %p with %px so that it shows real address
4766          * instead of hash value. However, that is only for the event
4767          * tracing, other tracer may not need. Defer the allocation
4768          * until it is needed.
4769          */
4770         iter->fmt = NULL;
4771         iter->fmt_size = 0;
4772
4773         /*
4774          * We make a copy of the current tracer to avoid concurrent
4775          * changes on it while we are reading.
4776          */
4777         mutex_lock(&trace_types_lock);
4778         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4779         if (!iter->trace)
4780                 goto fail;
4781
4782         *iter->trace = *tr->current_trace;
4783
4784         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4785                 goto fail;
4786
4787         iter->tr = tr;
4788
4789 #ifdef CONFIG_TRACER_MAX_TRACE
4790         /* Currently only the top directory has a snapshot */
4791         if (tr->current_trace->print_max || snapshot)
4792                 iter->array_buffer = &tr->max_buffer;
4793         else
4794 #endif
4795                 iter->array_buffer = &tr->array_buffer;
4796         iter->snapshot = snapshot;
4797         iter->pos = -1;
4798         iter->cpu_file = tracing_get_cpu(inode);
4799         mutex_init(&iter->mutex);
4800
4801         /* Notify the tracer early; before we stop tracing. */
4802         if (iter->trace->open)
4803                 iter->trace->open(iter);
4804
4805         /* Annotate start of buffers if we had overruns */
4806         if (ring_buffer_overruns(iter->array_buffer->buffer))
4807                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4808
4809         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4810         if (trace_clocks[tr->clock_id].in_ns)
4811                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4812
4813         /*
4814          * If pause-on-trace is enabled, then stop the trace while
4815          * dumping, unless this is the "snapshot" file
4816          */
4817         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4818                 tracing_stop_tr(tr);
4819
4820         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4821                 for_each_tracing_cpu(cpu) {
4822                         iter->buffer_iter[cpu] =
4823                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4824                                                          cpu, GFP_KERNEL);
4825                 }
4826                 ring_buffer_read_prepare_sync();
4827                 for_each_tracing_cpu(cpu) {
4828                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4829                         tracing_iter_reset(iter, cpu);
4830                 }
4831         } else {
4832                 cpu = iter->cpu_file;
4833                 iter->buffer_iter[cpu] =
4834                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4835                                                  cpu, GFP_KERNEL);
4836                 ring_buffer_read_prepare_sync();
4837                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4838                 tracing_iter_reset(iter, cpu);
4839         }
4840
4841         mutex_unlock(&trace_types_lock);
4842
4843         return iter;
4844
4845  fail:
4846         mutex_unlock(&trace_types_lock);
4847         kfree(iter->trace);
4848         kfree(iter->temp);
4849         kfree(iter->buffer_iter);
4850 release:
4851         seq_release_private(inode, file);
4852         return ERR_PTR(-ENOMEM);
4853 }
4854
4855 int tracing_open_generic(struct inode *inode, struct file *filp)
4856 {
4857         int ret;
4858
4859         ret = tracing_check_open_get_tr(NULL);
4860         if (ret)
4861                 return ret;
4862
4863         filp->private_data = inode->i_private;
4864         return 0;
4865 }
4866
4867 bool tracing_is_disabled(void)
4868 {
4869         return (tracing_disabled) ? true: false;
4870 }
4871
4872 /*
4873  * Open and update trace_array ref count.
4874  * Must have the current trace_array passed to it.
4875  */
4876 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4877 {
4878         struct trace_array *tr = inode->i_private;
4879         int ret;
4880
4881         ret = tracing_check_open_get_tr(tr);
4882         if (ret)
4883                 return ret;
4884
4885         filp->private_data = inode->i_private;
4886
4887         return 0;
4888 }
4889
4890 /*
4891  * The private pointer of the inode is the trace_event_file.
4892  * Update the tr ref count associated to it.
4893  */
4894 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4895 {
4896         struct trace_event_file *file = inode->i_private;
4897         int ret;
4898
4899         ret = tracing_check_open_get_tr(file->tr);
4900         if (ret)
4901                 return ret;
4902
4903         filp->private_data = inode->i_private;
4904
4905         return 0;
4906 }
4907
4908 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4909 {
4910         struct trace_event_file *file = inode->i_private;
4911
4912         trace_array_put(file->tr);
4913
4914         return 0;
4915 }
4916
4917 static int tracing_mark_open(struct inode *inode, struct file *filp)
4918 {
4919         stream_open(inode, filp);
4920         return tracing_open_generic_tr(inode, filp);
4921 }
4922
4923 static int tracing_release(struct inode *inode, struct file *file)
4924 {
4925         struct trace_array *tr = inode->i_private;
4926         struct seq_file *m = file->private_data;
4927         struct trace_iterator *iter;
4928         int cpu;
4929
4930         if (!(file->f_mode & FMODE_READ)) {
4931                 trace_array_put(tr);
4932                 return 0;
4933         }
4934
4935         /* Writes do not use seq_file */
4936         iter = m->private;
4937         mutex_lock(&trace_types_lock);
4938
4939         for_each_tracing_cpu(cpu) {
4940                 if (iter->buffer_iter[cpu])
4941                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4942         }
4943
4944         if (iter->trace && iter->trace->close)
4945                 iter->trace->close(iter);
4946
4947         if (!iter->snapshot && tr->stop_count)
4948                 /* reenable tracing if it was previously enabled */
4949                 tracing_start_tr(tr);
4950
4951         __trace_array_put(tr);
4952
4953         mutex_unlock(&trace_types_lock);
4954
4955         mutex_destroy(&iter->mutex);
4956         free_cpumask_var(iter->started);
4957         kfree(iter->fmt);
4958         kfree(iter->temp);
4959         kfree(iter->trace);
4960         kfree(iter->buffer_iter);
4961         seq_release_private(inode, file);
4962
4963         return 0;
4964 }
4965
4966 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4967 {
4968         struct trace_array *tr = inode->i_private;
4969
4970         trace_array_put(tr);
4971         return 0;
4972 }
4973
4974 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4975 {
4976         struct trace_array *tr = inode->i_private;
4977
4978         trace_array_put(tr);
4979
4980         return single_release(inode, file);
4981 }
4982
4983 static int tracing_open(struct inode *inode, struct file *file)
4984 {
4985         struct trace_array *tr = inode->i_private;
4986         struct trace_iterator *iter;
4987         int ret;
4988
4989         ret = tracing_check_open_get_tr(tr);
4990         if (ret)
4991                 return ret;
4992
4993         /* If this file was open for write, then erase contents */
4994         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4995                 int cpu = tracing_get_cpu(inode);
4996                 struct array_buffer *trace_buf = &tr->array_buffer;
4997
4998 #ifdef CONFIG_TRACER_MAX_TRACE
4999                 if (tr->current_trace->print_max)
5000                         trace_buf = &tr->max_buffer;
5001 #endif
5002
5003                 if (cpu == RING_BUFFER_ALL_CPUS)
5004                         tracing_reset_online_cpus(trace_buf);
5005                 else
5006                         tracing_reset_cpu(trace_buf, cpu);
5007         }
5008
5009         if (file->f_mode & FMODE_READ) {
5010                 iter = __tracing_open(inode, file, false);
5011                 if (IS_ERR(iter))
5012                         ret = PTR_ERR(iter);
5013                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5014                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5015         }
5016
5017         if (ret < 0)
5018                 trace_array_put(tr);
5019
5020         return ret;
5021 }
5022
5023 /*
5024  * Some tracers are not suitable for instance buffers.
5025  * A tracer is always available for the global array (toplevel)
5026  * or if it explicitly states that it is.
5027  */
5028 static bool
5029 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5030 {
5031         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5032 }
5033
5034 /* Find the next tracer that this trace array may use */
5035 static struct tracer *
5036 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5037 {
5038         while (t && !trace_ok_for_array(t, tr))
5039                 t = t->next;
5040
5041         return t;
5042 }
5043
5044 static void *
5045 t_next(struct seq_file *m, void *v, loff_t *pos)
5046 {
5047         struct trace_array *tr = m->private;
5048         struct tracer *t = v;
5049
5050         (*pos)++;
5051
5052         if (t)
5053                 t = get_tracer_for_array(tr, t->next);
5054
5055         return t;
5056 }
5057
5058 static void *t_start(struct seq_file *m, loff_t *pos)
5059 {
5060         struct trace_array *tr = m->private;
5061         struct tracer *t;
5062         loff_t l = 0;
5063
5064         mutex_lock(&trace_types_lock);
5065
5066         t = get_tracer_for_array(tr, trace_types);
5067         for (; t && l < *pos; t = t_next(m, t, &l))
5068                         ;
5069
5070         return t;
5071 }
5072
5073 static void t_stop(struct seq_file *m, void *p)
5074 {
5075         mutex_unlock(&trace_types_lock);
5076 }
5077
5078 static int t_show(struct seq_file *m, void *v)
5079 {
5080         struct tracer *t = v;
5081
5082         if (!t)
5083                 return 0;
5084
5085         seq_puts(m, t->name);
5086         if (t->next)
5087                 seq_putc(m, ' ');
5088         else
5089                 seq_putc(m, '\n');
5090
5091         return 0;
5092 }
5093
5094 static const struct seq_operations show_traces_seq_ops = {
5095         .start          = t_start,
5096         .next           = t_next,
5097         .stop           = t_stop,
5098         .show           = t_show,
5099 };
5100
5101 static int show_traces_open(struct inode *inode, struct file *file)
5102 {
5103         struct trace_array *tr = inode->i_private;
5104         struct seq_file *m;
5105         int ret;
5106
5107         ret = tracing_check_open_get_tr(tr);
5108         if (ret)
5109                 return ret;
5110
5111         ret = seq_open(file, &show_traces_seq_ops);
5112         if (ret) {
5113                 trace_array_put(tr);
5114                 return ret;
5115         }
5116
5117         m = file->private_data;
5118         m->private = tr;
5119
5120         return 0;
5121 }
5122
5123 static int show_traces_release(struct inode *inode, struct file *file)
5124 {
5125         struct trace_array *tr = inode->i_private;
5126
5127         trace_array_put(tr);
5128         return seq_release(inode, file);
5129 }
5130
5131 static ssize_t
5132 tracing_write_stub(struct file *filp, const char __user *ubuf,
5133                    size_t count, loff_t *ppos)
5134 {
5135         return count;
5136 }
5137
5138 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5139 {
5140         int ret;
5141
5142         if (file->f_mode & FMODE_READ)
5143                 ret = seq_lseek(file, offset, whence);
5144         else
5145                 file->f_pos = ret = 0;
5146
5147         return ret;
5148 }
5149
5150 static const struct file_operations tracing_fops = {
5151         .open           = tracing_open,
5152         .read           = seq_read,
5153         .read_iter      = seq_read_iter,
5154         .splice_read    = generic_file_splice_read,
5155         .write          = tracing_write_stub,
5156         .llseek         = tracing_lseek,
5157         .release        = tracing_release,
5158 };
5159
5160 static const struct file_operations show_traces_fops = {
5161         .open           = show_traces_open,
5162         .read           = seq_read,
5163         .llseek         = seq_lseek,
5164         .release        = show_traces_release,
5165 };
5166
5167 static ssize_t
5168 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5169                      size_t count, loff_t *ppos)
5170 {
5171         struct trace_array *tr = file_inode(filp)->i_private;
5172         char *mask_str;
5173         int len;
5174
5175         len = snprintf(NULL, 0, "%*pb\n",
5176                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5177         mask_str = kmalloc(len, GFP_KERNEL);
5178         if (!mask_str)
5179                 return -ENOMEM;
5180
5181         len = snprintf(mask_str, len, "%*pb\n",
5182                        cpumask_pr_args(tr->tracing_cpumask));
5183         if (len >= count) {
5184                 count = -EINVAL;
5185                 goto out_err;
5186         }
5187         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5188
5189 out_err:
5190         kfree(mask_str);
5191
5192         return count;
5193 }
5194
5195 int tracing_set_cpumask(struct trace_array *tr,
5196                         cpumask_var_t tracing_cpumask_new)
5197 {
5198         int cpu;
5199
5200         if (!tr)
5201                 return -EINVAL;
5202
5203         local_irq_disable();
5204         arch_spin_lock(&tr->max_lock);
5205         for_each_tracing_cpu(cpu) {
5206                 /*
5207                  * Increase/decrease the disabled counter if we are
5208                  * about to flip a bit in the cpumask:
5209                  */
5210                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5211                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5212                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5213                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5214 #ifdef CONFIG_TRACER_MAX_TRACE
5215                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5216 #endif
5217                 }
5218                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5219                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5220                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5221                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5222 #ifdef CONFIG_TRACER_MAX_TRACE
5223                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5224 #endif
5225                 }
5226         }
5227         arch_spin_unlock(&tr->max_lock);
5228         local_irq_enable();
5229
5230         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5231
5232         return 0;
5233 }
5234
5235 static ssize_t
5236 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5237                       size_t count, loff_t *ppos)
5238 {
5239         struct trace_array *tr = file_inode(filp)->i_private;
5240         cpumask_var_t tracing_cpumask_new;
5241         int err;
5242
5243         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5244                 return -ENOMEM;
5245
5246         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5247         if (err)
5248                 goto err_free;
5249
5250         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5251         if (err)
5252                 goto err_free;
5253
5254         free_cpumask_var(tracing_cpumask_new);
5255
5256         return count;
5257
5258 err_free:
5259         free_cpumask_var(tracing_cpumask_new);
5260
5261         return err;
5262 }
5263
5264 static const struct file_operations tracing_cpumask_fops = {
5265         .open           = tracing_open_generic_tr,
5266         .read           = tracing_cpumask_read,
5267         .write          = tracing_cpumask_write,
5268         .release        = tracing_release_generic_tr,
5269         .llseek         = generic_file_llseek,
5270 };
5271
5272 static int tracing_trace_options_show(struct seq_file *m, void *v)
5273 {
5274         struct tracer_opt *trace_opts;
5275         struct trace_array *tr = m->private;
5276         u32 tracer_flags;
5277         int i;
5278
5279         mutex_lock(&trace_types_lock);
5280         tracer_flags = tr->current_trace->flags->val;
5281         trace_opts = tr->current_trace->flags->opts;
5282
5283         for (i = 0; trace_options[i]; i++) {
5284                 if (tr->trace_flags & (1 << i))
5285                         seq_printf(m, "%s\n", trace_options[i]);
5286                 else
5287                         seq_printf(m, "no%s\n", trace_options[i]);
5288         }
5289
5290         for (i = 0; trace_opts[i].name; i++) {
5291                 if (tracer_flags & trace_opts[i].bit)
5292                         seq_printf(m, "%s\n", trace_opts[i].name);
5293                 else
5294                         seq_printf(m, "no%s\n", trace_opts[i].name);
5295         }
5296         mutex_unlock(&trace_types_lock);
5297
5298         return 0;
5299 }
5300
5301 static int __set_tracer_option(struct trace_array *tr,
5302                                struct tracer_flags *tracer_flags,
5303                                struct tracer_opt *opts, int neg)
5304 {
5305         struct tracer *trace = tracer_flags->trace;
5306         int ret;
5307
5308         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5309         if (ret)
5310                 return ret;
5311
5312         if (neg)
5313                 tracer_flags->val &= ~opts->bit;
5314         else
5315                 tracer_flags->val |= opts->bit;
5316         return 0;
5317 }
5318
5319 /* Try to assign a tracer specific option */
5320 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5321 {
5322         struct tracer *trace = tr->current_trace;
5323         struct tracer_flags *tracer_flags = trace->flags;
5324         struct tracer_opt *opts = NULL;
5325         int i;
5326
5327         for (i = 0; tracer_flags->opts[i].name; i++) {
5328                 opts = &tracer_flags->opts[i];
5329
5330                 if (strcmp(cmp, opts->name) == 0)
5331                         return __set_tracer_option(tr, trace->flags, opts, neg);
5332         }
5333
5334         return -EINVAL;
5335 }
5336
5337 /* Some tracers require overwrite to stay enabled */
5338 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5339 {
5340         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5341                 return -1;
5342
5343         return 0;
5344 }
5345
5346 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5347 {
5348         int *map;
5349
5350         if ((mask == TRACE_ITER_RECORD_TGID) ||
5351             (mask == TRACE_ITER_RECORD_CMD))
5352                 lockdep_assert_held(&event_mutex);
5353
5354         /* do nothing if flag is already set */
5355         if (!!(tr->trace_flags & mask) == !!enabled)
5356                 return 0;
5357
5358         /* Give the tracer a chance to approve the change */
5359         if (tr->current_trace->flag_changed)
5360                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5361                         return -EINVAL;
5362
5363         if (enabled)
5364                 tr->trace_flags |= mask;
5365         else
5366                 tr->trace_flags &= ~mask;
5367
5368         if (mask == TRACE_ITER_RECORD_CMD)
5369                 trace_event_enable_cmd_record(enabled);
5370
5371         if (mask == TRACE_ITER_RECORD_TGID) {
5372                 if (!tgid_map) {
5373                         tgid_map_max = pid_max;
5374                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5375                                        GFP_KERNEL);
5376
5377                         /*
5378                          * Pairs with smp_load_acquire() in
5379                          * trace_find_tgid_ptr() to ensure that if it observes
5380                          * the tgid_map we just allocated then it also observes
5381                          * the corresponding tgid_map_max value.
5382                          */
5383                         smp_store_release(&tgid_map, map);
5384                 }
5385                 if (!tgid_map) {
5386                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5387                         return -ENOMEM;
5388                 }
5389
5390                 trace_event_enable_tgid_record(enabled);
5391         }
5392
5393         if (mask == TRACE_ITER_EVENT_FORK)
5394                 trace_event_follow_fork(tr, enabled);
5395
5396         if (mask == TRACE_ITER_FUNC_FORK)
5397                 ftrace_pid_follow_fork(tr, enabled);
5398
5399         if (mask == TRACE_ITER_OVERWRITE) {
5400                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5401 #ifdef CONFIG_TRACER_MAX_TRACE
5402                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5403 #endif
5404         }
5405
5406         if (mask == TRACE_ITER_PRINTK) {
5407                 trace_printk_start_stop_comm(enabled);
5408                 trace_printk_control(enabled);
5409         }
5410
5411         return 0;
5412 }
5413
5414 int trace_set_options(struct trace_array *tr, char *option)
5415 {
5416         char *cmp;
5417         int neg = 0;
5418         int ret;
5419         size_t orig_len = strlen(option);
5420         int len;
5421
5422         cmp = strstrip(option);
5423
5424         len = str_has_prefix(cmp, "no");
5425         if (len)
5426                 neg = 1;
5427
5428         cmp += len;
5429
5430         mutex_lock(&event_mutex);
5431         mutex_lock(&trace_types_lock);
5432
5433         ret = match_string(trace_options, -1, cmp);
5434         /* If no option could be set, test the specific tracer options */
5435         if (ret < 0)
5436                 ret = set_tracer_option(tr, cmp, neg);
5437         else
5438                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5439
5440         mutex_unlock(&trace_types_lock);
5441         mutex_unlock(&event_mutex);
5442
5443         /*
5444          * If the first trailing whitespace is replaced with '\0' by strstrip,
5445          * turn it back into a space.
5446          */
5447         if (orig_len > strlen(option))
5448                 option[strlen(option)] = ' ';
5449
5450         return ret;
5451 }
5452
5453 static void __init apply_trace_boot_options(void)
5454 {
5455         char *buf = trace_boot_options_buf;
5456         char *option;
5457
5458         while (true) {
5459                 option = strsep(&buf, ",");
5460
5461                 if (!option)
5462                         break;
5463
5464                 if (*option)
5465                         trace_set_options(&global_trace, option);
5466
5467                 /* Put back the comma to allow this to be called again */
5468                 if (buf)
5469                         *(buf - 1) = ',';
5470         }
5471 }
5472
5473 static ssize_t
5474 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5475                         size_t cnt, loff_t *ppos)
5476 {
5477         struct seq_file *m = filp->private_data;
5478         struct trace_array *tr = m->private;
5479         char buf[64];
5480         int ret;
5481
5482         if (cnt >= sizeof(buf))
5483                 return -EINVAL;
5484
5485         if (copy_from_user(buf, ubuf, cnt))
5486                 return -EFAULT;
5487
5488         buf[cnt] = 0;
5489
5490         ret = trace_set_options(tr, buf);
5491         if (ret < 0)
5492                 return ret;
5493
5494         *ppos += cnt;
5495
5496         return cnt;
5497 }
5498
5499 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5500 {
5501         struct trace_array *tr = inode->i_private;
5502         int ret;
5503
5504         ret = tracing_check_open_get_tr(tr);
5505         if (ret)
5506                 return ret;
5507
5508         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5509         if (ret < 0)
5510                 trace_array_put(tr);
5511
5512         return ret;
5513 }
5514
5515 static const struct file_operations tracing_iter_fops = {
5516         .open           = tracing_trace_options_open,
5517         .read           = seq_read,
5518         .llseek         = seq_lseek,
5519         .release        = tracing_single_release_tr,
5520         .write          = tracing_trace_options_write,
5521 };
5522
5523 static const char readme_msg[] =
5524         "tracing mini-HOWTO:\n\n"
5525         "# echo 0 > tracing_on : quick way to disable tracing\n"
5526         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5527         " Important files:\n"
5528         "  trace\t\t\t- The static contents of the buffer\n"
5529         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5530         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5531         "  current_tracer\t- function and latency tracers\n"
5532         "  available_tracers\t- list of configured tracers for current_tracer\n"
5533         "  error_log\t- error log for failed commands (that support it)\n"
5534         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5535         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5536         "  trace_clock\t\t-change the clock used to order events\n"
5537         "       local:   Per cpu clock but may not be synced across CPUs\n"
5538         "      global:   Synced across CPUs but slows tracing down.\n"
5539         "     counter:   Not a clock, but just an increment\n"
5540         "      uptime:   Jiffy counter from time of boot\n"
5541         "        perf:   Same clock that perf events use\n"
5542 #ifdef CONFIG_X86_64
5543         "     x86-tsc:   TSC cycle counter\n"
5544 #endif
5545         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5546         "       delta:   Delta difference against a buffer-wide timestamp\n"
5547         "    absolute:   Absolute (standalone) timestamp\n"
5548         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5549         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5550         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5551         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5552         "\t\t\t  Remove sub-buffer with rmdir\n"
5553         "  trace_options\t\t- Set format or modify how tracing happens\n"
5554         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5555         "\t\t\t  option name\n"
5556         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5557 #ifdef CONFIG_DYNAMIC_FTRACE
5558         "\n  available_filter_functions - list of functions that can be filtered on\n"
5559         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5560         "\t\t\t  functions\n"
5561         "\t     accepts: func_full_name or glob-matching-pattern\n"
5562         "\t     modules: Can select a group via module\n"
5563         "\t      Format: :mod:<module-name>\n"
5564         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5565         "\t    triggers: a command to perform when function is hit\n"
5566         "\t      Format: <function>:<trigger>[:count]\n"
5567         "\t     trigger: traceon, traceoff\n"
5568         "\t\t      enable_event:<system>:<event>\n"
5569         "\t\t      disable_event:<system>:<event>\n"
5570 #ifdef CONFIG_STACKTRACE
5571         "\t\t      stacktrace\n"
5572 #endif
5573 #ifdef CONFIG_TRACER_SNAPSHOT
5574         "\t\t      snapshot\n"
5575 #endif
5576         "\t\t      dump\n"
5577         "\t\t      cpudump\n"
5578         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5579         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5580         "\t     The first one will disable tracing every time do_fault is hit\n"
5581         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5582         "\t       The first time do trap is hit and it disables tracing, the\n"
5583         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5584         "\t       the counter will not decrement. It only decrements when the\n"
5585         "\t       trigger did work\n"
5586         "\t     To remove trigger without count:\n"
5587         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5588         "\t     To remove trigger with a count:\n"
5589         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5590         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5591         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5592         "\t    modules: Can select a group via module command :mod:\n"
5593         "\t    Does not accept triggers\n"
5594 #endif /* CONFIG_DYNAMIC_FTRACE */
5595 #ifdef CONFIG_FUNCTION_TRACER
5596         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5597         "\t\t    (function)\n"
5598         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5599         "\t\t    (function)\n"
5600 #endif
5601 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5602         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5603         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5604         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5605 #endif
5606 #ifdef CONFIG_TRACER_SNAPSHOT
5607         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5608         "\t\t\t  snapshot buffer. Read the contents for more\n"
5609         "\t\t\t  information\n"
5610 #endif
5611 #ifdef CONFIG_STACK_TRACER
5612         "  stack_trace\t\t- Shows the max stack trace when active\n"
5613         "  stack_max_size\t- Shows current max stack size that was traced\n"
5614         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5615         "\t\t\t  new trace)\n"
5616 #ifdef CONFIG_DYNAMIC_FTRACE
5617         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5618         "\t\t\t  traces\n"
5619 #endif
5620 #endif /* CONFIG_STACK_TRACER */
5621 #ifdef CONFIG_DYNAMIC_EVENTS
5622         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5623         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5624 #endif
5625 #ifdef CONFIG_KPROBE_EVENTS
5626         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5627         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5628 #endif
5629 #ifdef CONFIG_UPROBE_EVENTS
5630         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5631         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5632 #endif
5633 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5634         "\t  accepts: event-definitions (one definition per line)\n"
5635         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5636         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5637 #ifdef CONFIG_HIST_TRIGGERS
5638         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5639 #endif
5640         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5641         "\t           -:[<group>/]<event>\n"
5642 #ifdef CONFIG_KPROBE_EVENTS
5643         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5644   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5645 #endif
5646 #ifdef CONFIG_UPROBE_EVENTS
5647   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5648 #endif
5649         "\t     args: <name>=fetcharg[:type]\n"
5650         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5651 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5652         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5653 #else
5654         "\t           $stack<index>, $stack, $retval, $comm,\n"
5655 #endif
5656         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5657         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5658         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5659         "\t           symstr, <type>\\[<array-size>\\]\n"
5660 #ifdef CONFIG_HIST_TRIGGERS
5661         "\t    field: <stype> <name>;\n"
5662         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5663         "\t           [unsigned] char/int/long\n"
5664 #endif
5665         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5666         "\t            of the <attached-group>/<attached-event>.\n"
5667 #endif
5668         "  events/\t\t- Directory containing all trace event subsystems:\n"
5669         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5670         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5671         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5672         "\t\t\t  events\n"
5673         "      filter\t\t- If set, only events passing filter are traced\n"
5674         "  events/<system>/<event>/\t- Directory containing control files for\n"
5675         "\t\t\t  <event>:\n"
5676         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5677         "      filter\t\t- If set, only events passing filter are traced\n"
5678         "      trigger\t\t- If set, a command to perform when event is hit\n"
5679         "\t    Format: <trigger>[:count][if <filter>]\n"
5680         "\t   trigger: traceon, traceoff\n"
5681         "\t            enable_event:<system>:<event>\n"
5682         "\t            disable_event:<system>:<event>\n"
5683 #ifdef CONFIG_HIST_TRIGGERS
5684         "\t            enable_hist:<system>:<event>\n"
5685         "\t            disable_hist:<system>:<event>\n"
5686 #endif
5687 #ifdef CONFIG_STACKTRACE
5688         "\t\t    stacktrace\n"
5689 #endif
5690 #ifdef CONFIG_TRACER_SNAPSHOT
5691         "\t\t    snapshot\n"
5692 #endif
5693 #ifdef CONFIG_HIST_TRIGGERS
5694         "\t\t    hist (see below)\n"
5695 #endif
5696         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5697         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5698         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5699         "\t                  events/block/block_unplug/trigger\n"
5700         "\t   The first disables tracing every time block_unplug is hit.\n"
5701         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5702         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5703         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5704         "\t   Like function triggers, the counter is only decremented if it\n"
5705         "\t    enabled or disabled tracing.\n"
5706         "\t   To remove a trigger without a count:\n"
5707         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5708         "\t   To remove a trigger with a count:\n"
5709         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5710         "\t   Filters can be ignored when removing a trigger.\n"
5711 #ifdef CONFIG_HIST_TRIGGERS
5712         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5713         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5714         "\t            [:values=<field1[,field2,...]>]\n"
5715         "\t            [:sort=<field1[,field2,...]>]\n"
5716         "\t            [:size=#entries]\n"
5717         "\t            [:pause][:continue][:clear]\n"
5718         "\t            [:name=histname1]\n"
5719         "\t            [:<handler>.<action>]\n"
5720         "\t            [if <filter>]\n\n"
5721         "\t    Note, special fields can be used as well:\n"
5722         "\t            common_timestamp - to record current timestamp\n"
5723         "\t            common_cpu - to record the CPU the event happened on\n"
5724         "\n"
5725         "\t    When a matching event is hit, an entry is added to a hash\n"
5726         "\t    table using the key(s) and value(s) named, and the value of a\n"
5727         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5728         "\t    correspond to fields in the event's format description.  Keys\n"
5729         "\t    can be any field, or the special string 'stacktrace'.\n"
5730         "\t    Compound keys consisting of up to two fields can be specified\n"
5731         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5732         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5733         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5734         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5735         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5736         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5737         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5738         "\t    its histogram data will be shared with other triggers of the\n"
5739         "\t    same name, and trigger hits will update this common data.\n\n"
5740         "\t    Reading the 'hist' file for the event will dump the hash\n"
5741         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5742         "\t    triggers attached to an event, there will be a table for each\n"
5743         "\t    trigger in the output.  The table displayed for a named\n"
5744         "\t    trigger will be the same as any other instance having the\n"
5745         "\t    same name.  The default format used to display a given field\n"
5746         "\t    can be modified by appending any of the following modifiers\n"
5747         "\t    to the field name, as applicable:\n\n"
5748         "\t            .hex        display a number as a hex value\n"
5749         "\t            .sym        display an address as a symbol\n"
5750         "\t            .sym-offset display an address as a symbol and offset\n"
5751         "\t            .execname   display a common_pid as a program name\n"
5752         "\t            .syscall    display a syscall id as a syscall name\n"
5753         "\t            .log2       display log2 value rather than raw number\n"
5754         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5755         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5756         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5757         "\t    trigger or to start a hist trigger but not log any events\n"
5758         "\t    until told to do so.  'continue' can be used to start or\n"
5759         "\t    restart a paused hist trigger.\n\n"
5760         "\t    The 'clear' parameter will clear the contents of a running\n"
5761         "\t    hist trigger and leave its current paused/active state\n"
5762         "\t    unchanged.\n\n"
5763         "\t    The enable_hist and disable_hist triggers can be used to\n"
5764         "\t    have one event conditionally start and stop another event's\n"
5765         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5766         "\t    the enable_event and disable_event triggers.\n\n"
5767         "\t    Hist trigger handlers and actions are executed whenever a\n"
5768         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5769         "\t        <handler>.<action>\n\n"
5770         "\t    The available handlers are:\n\n"
5771         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5772         "\t        onmax(var)               - invoke if var exceeds current max\n"
5773         "\t        onchange(var)            - invoke action if var changes\n\n"
5774         "\t    The available actions are:\n\n"
5775         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5776         "\t        save(field,...)                      - save current event fields\n"
5777 #ifdef CONFIG_TRACER_SNAPSHOT
5778         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5779 #endif
5780 #ifdef CONFIG_SYNTH_EVENTS
5781         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5782         "\t  Write into this file to define/undefine new synthetic events.\n"
5783         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5784 #endif
5785 #endif
5786 ;
5787
5788 static ssize_t
5789 tracing_readme_read(struct file *filp, char __user *ubuf,
5790                        size_t cnt, loff_t *ppos)
5791 {
5792         return simple_read_from_buffer(ubuf, cnt, ppos,
5793                                         readme_msg, strlen(readme_msg));
5794 }
5795
5796 static const struct file_operations tracing_readme_fops = {
5797         .open           = tracing_open_generic,
5798         .read           = tracing_readme_read,
5799         .llseek         = generic_file_llseek,
5800 };
5801
5802 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5803 {
5804         int pid = ++(*pos);
5805
5806         return trace_find_tgid_ptr(pid);
5807 }
5808
5809 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5810 {
5811         int pid = *pos;
5812
5813         return trace_find_tgid_ptr(pid);
5814 }
5815
5816 static void saved_tgids_stop(struct seq_file *m, void *v)
5817 {
5818 }
5819
5820 static int saved_tgids_show(struct seq_file *m, void *v)
5821 {
5822         int *entry = (int *)v;
5823         int pid = entry - tgid_map;
5824         int tgid = *entry;
5825
5826         if (tgid == 0)
5827                 return SEQ_SKIP;
5828
5829         seq_printf(m, "%d %d\n", pid, tgid);
5830         return 0;
5831 }
5832
5833 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5834         .start          = saved_tgids_start,
5835         .stop           = saved_tgids_stop,
5836         .next           = saved_tgids_next,
5837         .show           = saved_tgids_show,
5838 };
5839
5840 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5841 {
5842         int ret;
5843
5844         ret = tracing_check_open_get_tr(NULL);
5845         if (ret)
5846                 return ret;
5847
5848         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5849 }
5850
5851
5852 static const struct file_operations tracing_saved_tgids_fops = {
5853         .open           = tracing_saved_tgids_open,
5854         .read           = seq_read,
5855         .llseek         = seq_lseek,
5856         .release        = seq_release,
5857 };
5858
5859 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5860 {
5861         unsigned int *ptr = v;
5862
5863         if (*pos || m->count)
5864                 ptr++;
5865
5866         (*pos)++;
5867
5868         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5869              ptr++) {
5870                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5871                         continue;
5872
5873                 return ptr;
5874         }
5875
5876         return NULL;
5877 }
5878
5879 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5880 {
5881         void *v;
5882         loff_t l = 0;
5883
5884         preempt_disable();
5885         arch_spin_lock(&trace_cmdline_lock);
5886
5887         v = &savedcmd->map_cmdline_to_pid[0];
5888         while (l <= *pos) {
5889                 v = saved_cmdlines_next(m, v, &l);
5890                 if (!v)
5891                         return NULL;
5892         }
5893
5894         return v;
5895 }
5896
5897 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5898 {
5899         arch_spin_unlock(&trace_cmdline_lock);
5900         preempt_enable();
5901 }
5902
5903 static int saved_cmdlines_show(struct seq_file *m, void *v)
5904 {
5905         char buf[TASK_COMM_LEN];
5906         unsigned int *pid = v;
5907
5908         __trace_find_cmdline(*pid, buf);
5909         seq_printf(m, "%d %s\n", *pid, buf);
5910         return 0;
5911 }
5912
5913 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5914         .start          = saved_cmdlines_start,
5915         .next           = saved_cmdlines_next,
5916         .stop           = saved_cmdlines_stop,
5917         .show           = saved_cmdlines_show,
5918 };
5919
5920 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5921 {
5922         int ret;
5923
5924         ret = tracing_check_open_get_tr(NULL);
5925         if (ret)
5926                 return ret;
5927
5928         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5929 }
5930
5931 static const struct file_operations tracing_saved_cmdlines_fops = {
5932         .open           = tracing_saved_cmdlines_open,
5933         .read           = seq_read,
5934         .llseek         = seq_lseek,
5935         .release        = seq_release,
5936 };
5937
5938 static ssize_t
5939 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5940                                  size_t cnt, loff_t *ppos)
5941 {
5942         char buf[64];
5943         int r;
5944
5945         preempt_disable();
5946         arch_spin_lock(&trace_cmdline_lock);
5947         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5948         arch_spin_unlock(&trace_cmdline_lock);
5949         preempt_enable();
5950
5951         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5952 }
5953
5954 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5955 {
5956         kfree(s->saved_cmdlines);
5957         kfree(s->map_cmdline_to_pid);
5958         kfree(s);
5959 }
5960
5961 static int tracing_resize_saved_cmdlines(unsigned int val)
5962 {
5963         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5964
5965         s = kmalloc(sizeof(*s), GFP_KERNEL);
5966         if (!s)
5967                 return -ENOMEM;
5968
5969         if (allocate_cmdlines_buffer(val, s) < 0) {
5970                 kfree(s);
5971                 return -ENOMEM;
5972         }
5973
5974         preempt_disable();
5975         arch_spin_lock(&trace_cmdline_lock);
5976         savedcmd_temp = savedcmd;
5977         savedcmd = s;
5978         arch_spin_unlock(&trace_cmdline_lock);
5979         preempt_enable();
5980         free_saved_cmdlines_buffer(savedcmd_temp);
5981
5982         return 0;
5983 }
5984
5985 static ssize_t
5986 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5987                                   size_t cnt, loff_t *ppos)
5988 {
5989         unsigned long val;
5990         int ret;
5991
5992         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5993         if (ret)
5994                 return ret;
5995
5996         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5997         if (!val || val > PID_MAX_DEFAULT)
5998                 return -EINVAL;
5999
6000         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6001         if (ret < 0)
6002                 return ret;
6003
6004         *ppos += cnt;
6005
6006         return cnt;
6007 }
6008
6009 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6010         .open           = tracing_open_generic,
6011         .read           = tracing_saved_cmdlines_size_read,
6012         .write          = tracing_saved_cmdlines_size_write,
6013 };
6014
6015 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6016 static union trace_eval_map_item *
6017 update_eval_map(union trace_eval_map_item *ptr)
6018 {
6019         if (!ptr->map.eval_string) {
6020                 if (ptr->tail.next) {
6021                         ptr = ptr->tail.next;
6022                         /* Set ptr to the next real item (skip head) */
6023                         ptr++;
6024                 } else
6025                         return NULL;
6026         }
6027         return ptr;
6028 }
6029
6030 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6031 {
6032         union trace_eval_map_item *ptr = v;
6033
6034         /*
6035          * Paranoid! If ptr points to end, we don't want to increment past it.
6036          * This really should never happen.
6037          */
6038         (*pos)++;
6039         ptr = update_eval_map(ptr);
6040         if (WARN_ON_ONCE(!ptr))
6041                 return NULL;
6042
6043         ptr++;
6044         ptr = update_eval_map(ptr);
6045
6046         return ptr;
6047 }
6048
6049 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6050 {
6051         union trace_eval_map_item *v;
6052         loff_t l = 0;
6053
6054         mutex_lock(&trace_eval_mutex);
6055
6056         v = trace_eval_maps;
6057         if (v)
6058                 v++;
6059
6060         while (v && l < *pos) {
6061                 v = eval_map_next(m, v, &l);
6062         }
6063
6064         return v;
6065 }
6066
6067 static void eval_map_stop(struct seq_file *m, void *v)
6068 {
6069         mutex_unlock(&trace_eval_mutex);
6070 }
6071
6072 static int eval_map_show(struct seq_file *m, void *v)
6073 {
6074         union trace_eval_map_item *ptr = v;
6075
6076         seq_printf(m, "%s %ld (%s)\n",
6077                    ptr->map.eval_string, ptr->map.eval_value,
6078                    ptr->map.system);
6079
6080         return 0;
6081 }
6082
6083 static const struct seq_operations tracing_eval_map_seq_ops = {
6084         .start          = eval_map_start,
6085         .next           = eval_map_next,
6086         .stop           = eval_map_stop,
6087         .show           = eval_map_show,
6088 };
6089
6090 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6091 {
6092         int ret;
6093
6094         ret = tracing_check_open_get_tr(NULL);
6095         if (ret)
6096                 return ret;
6097
6098         return seq_open(filp, &tracing_eval_map_seq_ops);
6099 }
6100
6101 static const struct file_operations tracing_eval_map_fops = {
6102         .open           = tracing_eval_map_open,
6103         .read           = seq_read,
6104         .llseek         = seq_lseek,
6105         .release        = seq_release,
6106 };
6107
6108 static inline union trace_eval_map_item *
6109 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6110 {
6111         /* Return tail of array given the head */
6112         return ptr + ptr->head.length + 1;
6113 }
6114
6115 static void
6116 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6117                            int len)
6118 {
6119         struct trace_eval_map **stop;
6120         struct trace_eval_map **map;
6121         union trace_eval_map_item *map_array;
6122         union trace_eval_map_item *ptr;
6123
6124         stop = start + len;
6125
6126         /*
6127          * The trace_eval_maps contains the map plus a head and tail item,
6128          * where the head holds the module and length of array, and the
6129          * tail holds a pointer to the next list.
6130          */
6131         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6132         if (!map_array) {
6133                 pr_warn("Unable to allocate trace eval mapping\n");
6134                 return;
6135         }
6136
6137         mutex_lock(&trace_eval_mutex);
6138
6139         if (!trace_eval_maps)
6140                 trace_eval_maps = map_array;
6141         else {
6142                 ptr = trace_eval_maps;
6143                 for (;;) {
6144                         ptr = trace_eval_jmp_to_tail(ptr);
6145                         if (!ptr->tail.next)
6146                                 break;
6147                         ptr = ptr->tail.next;
6148
6149                 }
6150                 ptr->tail.next = map_array;
6151         }
6152         map_array->head.mod = mod;
6153         map_array->head.length = len;
6154         map_array++;
6155
6156         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6157                 map_array->map = **map;
6158                 map_array++;
6159         }
6160         memset(map_array, 0, sizeof(*map_array));
6161
6162         mutex_unlock(&trace_eval_mutex);
6163 }
6164
6165 static void trace_create_eval_file(struct dentry *d_tracer)
6166 {
6167         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6168                           NULL, &tracing_eval_map_fops);
6169 }
6170
6171 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6172 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6173 static inline void trace_insert_eval_map_file(struct module *mod,
6174                               struct trace_eval_map **start, int len) { }
6175 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6176
6177 static void trace_insert_eval_map(struct module *mod,
6178                                   struct trace_eval_map **start, int len)
6179 {
6180         struct trace_eval_map **map;
6181
6182         if (len <= 0)
6183                 return;
6184
6185         map = start;
6186
6187         trace_event_eval_update(map, len);
6188
6189         trace_insert_eval_map_file(mod, start, len);
6190 }
6191
6192 static ssize_t
6193 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6194                        size_t cnt, loff_t *ppos)
6195 {
6196         struct trace_array *tr = filp->private_data;
6197         char buf[MAX_TRACER_SIZE+2];
6198         int r;
6199
6200         mutex_lock(&trace_types_lock);
6201         r = sprintf(buf, "%s\n", tr->current_trace->name);
6202         mutex_unlock(&trace_types_lock);
6203
6204         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6205 }
6206
6207 int tracer_init(struct tracer *t, struct trace_array *tr)
6208 {
6209         tracing_reset_online_cpus(&tr->array_buffer);
6210         return t->init(tr);
6211 }
6212
6213 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6214 {
6215         int cpu;
6216
6217         for_each_tracing_cpu(cpu)
6218                 per_cpu_ptr(buf->data, cpu)->entries = val;
6219 }
6220
6221 #ifdef CONFIG_TRACER_MAX_TRACE
6222 /* resize @tr's buffer to the size of @size_tr's entries */
6223 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6224                                         struct array_buffer *size_buf, int cpu_id)
6225 {
6226         int cpu, ret = 0;
6227
6228         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6229                 for_each_tracing_cpu(cpu) {
6230                         ret = ring_buffer_resize(trace_buf->buffer,
6231                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6232                         if (ret < 0)
6233                                 break;
6234                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6235                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6236                 }
6237         } else {
6238                 ret = ring_buffer_resize(trace_buf->buffer,
6239                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6240                 if (ret == 0)
6241                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6242                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6243         }
6244
6245         return ret;
6246 }
6247 #endif /* CONFIG_TRACER_MAX_TRACE */
6248
6249 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6250                                         unsigned long size, int cpu)
6251 {
6252         int ret;
6253
6254         /*
6255          * If kernel or user changes the size of the ring buffer
6256          * we use the size that was given, and we can forget about
6257          * expanding it later.
6258          */
6259         ring_buffer_expanded = true;
6260
6261         /* May be called before buffers are initialized */
6262         if (!tr->array_buffer.buffer)
6263                 return 0;
6264
6265         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6266         if (ret < 0)
6267                 return ret;
6268
6269 #ifdef CONFIG_TRACER_MAX_TRACE
6270         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6271             !tr->current_trace->use_max_tr)
6272                 goto out;
6273
6274         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6275         if (ret < 0) {
6276                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6277                                                      &tr->array_buffer, cpu);
6278                 if (r < 0) {
6279                         /*
6280                          * AARGH! We are left with different
6281                          * size max buffer!!!!
6282                          * The max buffer is our "snapshot" buffer.
6283                          * When a tracer needs a snapshot (one of the
6284                          * latency tracers), it swaps the max buffer
6285                          * with the saved snap shot. We succeeded to
6286                          * update the size of the main buffer, but failed to
6287                          * update the size of the max buffer. But when we tried
6288                          * to reset the main buffer to the original size, we
6289                          * failed there too. This is very unlikely to
6290                          * happen, but if it does, warn and kill all
6291                          * tracing.
6292                          */
6293                         WARN_ON(1);
6294                         tracing_disabled = 1;
6295                 }
6296                 return ret;
6297         }
6298
6299         if (cpu == RING_BUFFER_ALL_CPUS)
6300                 set_buffer_entries(&tr->max_buffer, size);
6301         else
6302                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6303
6304  out:
6305 #endif /* CONFIG_TRACER_MAX_TRACE */
6306
6307         if (cpu == RING_BUFFER_ALL_CPUS)
6308                 set_buffer_entries(&tr->array_buffer, size);
6309         else
6310                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6311
6312         return ret;
6313 }
6314
6315 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6316                                   unsigned long size, int cpu_id)
6317 {
6318         int ret;
6319
6320         mutex_lock(&trace_types_lock);
6321
6322         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6323                 /* make sure, this cpu is enabled in the mask */
6324                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6325                         ret = -EINVAL;
6326                         goto out;
6327                 }
6328         }
6329
6330         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6331         if (ret < 0)
6332                 ret = -ENOMEM;
6333
6334 out:
6335         mutex_unlock(&trace_types_lock);
6336
6337         return ret;
6338 }
6339
6340
6341 /**
6342  * tracing_update_buffers - used by tracing facility to expand ring buffers
6343  *
6344  * To save on memory when the tracing is never used on a system with it
6345  * configured in. The ring buffers are set to a minimum size. But once
6346  * a user starts to use the tracing facility, then they need to grow
6347  * to their default size.
6348  *
6349  * This function is to be called when a tracer is about to be used.
6350  */
6351 int tracing_update_buffers(void)
6352 {
6353         int ret = 0;
6354
6355         mutex_lock(&trace_types_lock);
6356         if (!ring_buffer_expanded)
6357                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6358                                                 RING_BUFFER_ALL_CPUS);
6359         mutex_unlock(&trace_types_lock);
6360
6361         return ret;
6362 }
6363
6364 struct trace_option_dentry;
6365
6366 static void
6367 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6368
6369 /*
6370  * Used to clear out the tracer before deletion of an instance.
6371  * Must have trace_types_lock held.
6372  */
6373 static void tracing_set_nop(struct trace_array *tr)
6374 {
6375         if (tr->current_trace == &nop_trace)
6376                 return;
6377         
6378         tr->current_trace->enabled--;
6379
6380         if (tr->current_trace->reset)
6381                 tr->current_trace->reset(tr);
6382
6383         tr->current_trace = &nop_trace;
6384 }
6385
6386 static bool tracer_options_updated;
6387
6388 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6389 {
6390         /* Only enable if the directory has been created already. */
6391         if (!tr->dir)
6392                 return;
6393
6394         /* Only create trace option files after update_tracer_options finish */
6395         if (!tracer_options_updated)
6396                 return;
6397
6398         create_trace_option_files(tr, t);
6399 }
6400
6401 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6402 {
6403         struct tracer *t;
6404 #ifdef CONFIG_TRACER_MAX_TRACE
6405         bool had_max_tr;
6406 #endif
6407         int ret = 0;
6408
6409         mutex_lock(&trace_types_lock);
6410
6411         if (!ring_buffer_expanded) {
6412                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6413                                                 RING_BUFFER_ALL_CPUS);
6414                 if (ret < 0)
6415                         goto out;
6416                 ret = 0;
6417         }
6418
6419         for (t = trace_types; t; t = t->next) {
6420                 if (strcmp(t->name, buf) == 0)
6421                         break;
6422         }
6423         if (!t) {
6424                 ret = -EINVAL;
6425                 goto out;
6426         }
6427         if (t == tr->current_trace)
6428                 goto out;
6429
6430 #ifdef CONFIG_TRACER_SNAPSHOT
6431         if (t->use_max_tr) {
6432                 local_irq_disable();
6433                 arch_spin_lock(&tr->max_lock);
6434                 if (tr->cond_snapshot)
6435                         ret = -EBUSY;
6436                 arch_spin_unlock(&tr->max_lock);
6437                 local_irq_enable();
6438                 if (ret)
6439                         goto out;
6440         }
6441 #endif
6442         /* Some tracers won't work on kernel command line */
6443         if (system_state < SYSTEM_RUNNING && t->noboot) {
6444                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6445                         t->name);
6446                 goto out;
6447         }
6448
6449         /* Some tracers are only allowed for the top level buffer */
6450         if (!trace_ok_for_array(t, tr)) {
6451                 ret = -EINVAL;
6452                 goto out;
6453         }
6454
6455         /* If trace pipe files are being read, we can't change the tracer */
6456         if (tr->trace_ref) {
6457                 ret = -EBUSY;
6458                 goto out;
6459         }
6460
6461         trace_branch_disable();
6462
6463         tr->current_trace->enabled--;
6464
6465         if (tr->current_trace->reset)
6466                 tr->current_trace->reset(tr);
6467
6468 #ifdef CONFIG_TRACER_MAX_TRACE
6469         had_max_tr = tr->current_trace->use_max_tr;
6470
6471         /* Current trace needs to be nop_trace before synchronize_rcu */
6472         tr->current_trace = &nop_trace;
6473
6474         if (had_max_tr && !t->use_max_tr) {
6475                 /*
6476                  * We need to make sure that the update_max_tr sees that
6477                  * current_trace changed to nop_trace to keep it from
6478                  * swapping the buffers after we resize it.
6479                  * The update_max_tr is called from interrupts disabled
6480                  * so a synchronized_sched() is sufficient.
6481                  */
6482                 synchronize_rcu();
6483                 free_snapshot(tr);
6484         }
6485
6486         if (t->use_max_tr && !tr->allocated_snapshot) {
6487                 ret = tracing_alloc_snapshot_instance(tr);
6488                 if (ret < 0)
6489                         goto out;
6490         }
6491 #else
6492         tr->current_trace = &nop_trace;
6493 #endif
6494
6495         if (t->init) {
6496                 ret = tracer_init(t, tr);
6497                 if (ret)
6498                         goto out;
6499         }
6500
6501         tr->current_trace = t;
6502         tr->current_trace->enabled++;
6503         trace_branch_enable(tr);
6504  out:
6505         mutex_unlock(&trace_types_lock);
6506
6507         return ret;
6508 }
6509
6510 static ssize_t
6511 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6512                         size_t cnt, loff_t *ppos)
6513 {
6514         struct trace_array *tr = filp->private_data;
6515         char buf[MAX_TRACER_SIZE+1];
6516         int i;
6517         size_t ret;
6518         int err;
6519
6520         ret = cnt;
6521
6522         if (cnt > MAX_TRACER_SIZE)
6523                 cnt = MAX_TRACER_SIZE;
6524
6525         if (copy_from_user(buf, ubuf, cnt))
6526                 return -EFAULT;
6527
6528         buf[cnt] = 0;
6529
6530         /* strip ending whitespace. */
6531         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6532                 buf[i] = 0;
6533
6534         err = tracing_set_tracer(tr, buf);
6535         if (err)
6536                 return err;
6537
6538         *ppos += ret;
6539
6540         return ret;
6541 }
6542
6543 static ssize_t
6544 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6545                    size_t cnt, loff_t *ppos)
6546 {
6547         char buf[64];
6548         int r;
6549
6550         r = snprintf(buf, sizeof(buf), "%ld\n",
6551                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6552         if (r > sizeof(buf))
6553                 r = sizeof(buf);
6554         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 }
6556
6557 static ssize_t
6558 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6559                     size_t cnt, loff_t *ppos)
6560 {
6561         unsigned long val;
6562         int ret;
6563
6564         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6565         if (ret)
6566                 return ret;
6567
6568         *ptr = val * 1000;
6569
6570         return cnt;
6571 }
6572
6573 static ssize_t
6574 tracing_thresh_read(struct file *filp, char __user *ubuf,
6575                     size_t cnt, loff_t *ppos)
6576 {
6577         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6578 }
6579
6580 static ssize_t
6581 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6582                      size_t cnt, loff_t *ppos)
6583 {
6584         struct trace_array *tr = filp->private_data;
6585         int ret;
6586
6587         mutex_lock(&trace_types_lock);
6588         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6589         if (ret < 0)
6590                 goto out;
6591
6592         if (tr->current_trace->update_thresh) {
6593                 ret = tr->current_trace->update_thresh(tr);
6594                 if (ret < 0)
6595                         goto out;
6596         }
6597
6598         ret = cnt;
6599 out:
6600         mutex_unlock(&trace_types_lock);
6601
6602         return ret;
6603 }
6604
6605 #ifdef CONFIG_TRACER_MAX_TRACE
6606
6607 static ssize_t
6608 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6609                      size_t cnt, loff_t *ppos)
6610 {
6611         struct trace_array *tr = filp->private_data;
6612
6613         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6614 }
6615
6616 static ssize_t
6617 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6618                       size_t cnt, loff_t *ppos)
6619 {
6620         struct trace_array *tr = filp->private_data;
6621
6622         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6623 }
6624
6625 #endif
6626
6627 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6628 {
6629         if (cpu == RING_BUFFER_ALL_CPUS) {
6630                 if (cpumask_empty(tr->pipe_cpumask)) {
6631                         cpumask_setall(tr->pipe_cpumask);
6632                         return 0;
6633                 }
6634         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6635                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6636                 return 0;
6637         }
6638         return -EBUSY;
6639 }
6640
6641 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6642 {
6643         if (cpu == RING_BUFFER_ALL_CPUS) {
6644                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6645                 cpumask_clear(tr->pipe_cpumask);
6646         } else {
6647                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6648                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6649         }
6650 }
6651
6652 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6653 {
6654         struct trace_array *tr = inode->i_private;
6655         struct trace_iterator *iter;
6656         int cpu;
6657         int ret;
6658
6659         ret = tracing_check_open_get_tr(tr);
6660         if (ret)
6661                 return ret;
6662
6663         mutex_lock(&trace_types_lock);
6664         cpu = tracing_get_cpu(inode);
6665         ret = open_pipe_on_cpu(tr, cpu);
6666         if (ret)
6667                 goto fail_pipe_on_cpu;
6668
6669         /* create a buffer to store the information to pass to userspace */
6670         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6671         if (!iter) {
6672                 ret = -ENOMEM;
6673                 goto fail_alloc_iter;
6674         }
6675
6676         trace_seq_init(&iter->seq);
6677         iter->trace = tr->current_trace;
6678
6679         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6680                 ret = -ENOMEM;
6681                 goto fail;
6682         }
6683
6684         /* trace pipe does not show start of buffer */
6685         cpumask_setall(iter->started);
6686
6687         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6688                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6689
6690         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6691         if (trace_clocks[tr->clock_id].in_ns)
6692                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6693
6694         iter->tr = tr;
6695         iter->array_buffer = &tr->array_buffer;
6696         iter->cpu_file = cpu;
6697         mutex_init(&iter->mutex);
6698         filp->private_data = iter;
6699
6700         if (iter->trace->pipe_open)
6701                 iter->trace->pipe_open(iter);
6702
6703         nonseekable_open(inode, filp);
6704
6705         tr->trace_ref++;
6706
6707         mutex_unlock(&trace_types_lock);
6708         return ret;
6709
6710 fail:
6711         kfree(iter);
6712 fail_alloc_iter:
6713         close_pipe_on_cpu(tr, cpu);
6714 fail_pipe_on_cpu:
6715         __trace_array_put(tr);
6716         mutex_unlock(&trace_types_lock);
6717         return ret;
6718 }
6719
6720 static int tracing_release_pipe(struct inode *inode, struct file *file)
6721 {
6722         struct trace_iterator *iter = file->private_data;
6723         struct trace_array *tr = inode->i_private;
6724
6725         mutex_lock(&trace_types_lock);
6726
6727         tr->trace_ref--;
6728
6729         if (iter->trace->pipe_close)
6730                 iter->trace->pipe_close(iter);
6731         close_pipe_on_cpu(tr, iter->cpu_file);
6732         mutex_unlock(&trace_types_lock);
6733
6734         free_cpumask_var(iter->started);
6735         kfree(iter->fmt);
6736         kfree(iter->temp);
6737         mutex_destroy(&iter->mutex);
6738         kfree(iter);
6739
6740         trace_array_put(tr);
6741
6742         return 0;
6743 }
6744
6745 static __poll_t
6746 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6747 {
6748         struct trace_array *tr = iter->tr;
6749
6750         /* Iterators are static, they should be filled or empty */
6751         if (trace_buffer_iter(iter, iter->cpu_file))
6752                 return EPOLLIN | EPOLLRDNORM;
6753
6754         if (tr->trace_flags & TRACE_ITER_BLOCK)
6755                 /*
6756                  * Always select as readable when in blocking mode
6757                  */
6758                 return EPOLLIN | EPOLLRDNORM;
6759         else
6760                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6761                                              filp, poll_table, iter->tr->buffer_percent);
6762 }
6763
6764 static __poll_t
6765 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6766 {
6767         struct trace_iterator *iter = filp->private_data;
6768
6769         return trace_poll(iter, filp, poll_table);
6770 }
6771
6772 /* Must be called with iter->mutex held. */
6773 static int tracing_wait_pipe(struct file *filp)
6774 {
6775         struct trace_iterator *iter = filp->private_data;
6776         int ret;
6777
6778         while (trace_empty(iter)) {
6779
6780                 if ((filp->f_flags & O_NONBLOCK)) {
6781                         return -EAGAIN;
6782                 }
6783
6784                 /*
6785                  * We block until we read something and tracing is disabled.
6786                  * We still block if tracing is disabled, but we have never
6787                  * read anything. This allows a user to cat this file, and
6788                  * then enable tracing. But after we have read something,
6789                  * we give an EOF when tracing is again disabled.
6790                  *
6791                  * iter->pos will be 0 if we haven't read anything.
6792                  */
6793                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6794                         break;
6795
6796                 mutex_unlock(&iter->mutex);
6797
6798                 ret = wait_on_pipe(iter, 0);
6799
6800                 mutex_lock(&iter->mutex);
6801
6802                 if (ret)
6803                         return ret;
6804         }
6805
6806         return 1;
6807 }
6808
6809 /*
6810  * Consumer reader.
6811  */
6812 static ssize_t
6813 tracing_read_pipe(struct file *filp, char __user *ubuf,
6814                   size_t cnt, loff_t *ppos)
6815 {
6816         struct trace_iterator *iter = filp->private_data;
6817         ssize_t sret;
6818
6819         /*
6820          * Avoid more than one consumer on a single file descriptor
6821          * This is just a matter of traces coherency, the ring buffer itself
6822          * is protected.
6823          */
6824         mutex_lock(&iter->mutex);
6825
6826         /* return any leftover data */
6827         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6828         if (sret != -EBUSY)
6829                 goto out;
6830
6831         trace_seq_init(&iter->seq);
6832
6833         if (iter->trace->read) {
6834                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6835                 if (sret)
6836                         goto out;
6837         }
6838
6839 waitagain:
6840         sret = tracing_wait_pipe(filp);
6841         if (sret <= 0)
6842                 goto out;
6843
6844         /* stop when tracing is finished */
6845         if (trace_empty(iter)) {
6846                 sret = 0;
6847                 goto out;
6848         }
6849
6850         if (cnt >= PAGE_SIZE)
6851                 cnt = PAGE_SIZE - 1;
6852
6853         /* reset all but tr, trace, and overruns */
6854         memset(&iter->seq, 0,
6855                sizeof(struct trace_iterator) -
6856                offsetof(struct trace_iterator, seq));
6857         cpumask_clear(iter->started);
6858         trace_seq_init(&iter->seq);
6859         iter->pos = -1;
6860
6861         trace_event_read_lock();
6862         trace_access_lock(iter->cpu_file);
6863         while (trace_find_next_entry_inc(iter) != NULL) {
6864                 enum print_line_t ret;
6865                 int save_len = iter->seq.seq.len;
6866
6867                 ret = print_trace_line(iter);
6868                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6869                         /*
6870                          * If one print_trace_line() fills entire trace_seq in one shot,
6871                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6872                          * In this case, we need to consume it, otherwise, loop will peek
6873                          * this event next time, resulting in an infinite loop.
6874                          */
6875                         if (save_len == 0) {
6876                                 iter->seq.full = 0;
6877                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6878                                 trace_consume(iter);
6879                                 break;
6880                         }
6881
6882                         /* In other cases, don't print partial lines */
6883                         iter->seq.seq.len = save_len;
6884                         break;
6885                 }
6886                 if (ret != TRACE_TYPE_NO_CONSUME)
6887                         trace_consume(iter);
6888
6889                 if (trace_seq_used(&iter->seq) >= cnt)
6890                         break;
6891
6892                 /*
6893                  * Setting the full flag means we reached the trace_seq buffer
6894                  * size and we should leave by partial output condition above.
6895                  * One of the trace_seq_* functions is not used properly.
6896                  */
6897                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6898                           iter->ent->type);
6899         }
6900         trace_access_unlock(iter->cpu_file);
6901         trace_event_read_unlock();
6902
6903         /* Now copy what we have to the user */
6904         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6905         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6906                 trace_seq_init(&iter->seq);
6907
6908         /*
6909          * If there was nothing to send to user, in spite of consuming trace
6910          * entries, go back to wait for more entries.
6911          */
6912         if (sret == -EBUSY)
6913                 goto waitagain;
6914
6915 out:
6916         mutex_unlock(&iter->mutex);
6917
6918         return sret;
6919 }
6920
6921 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6922                                      unsigned int idx)
6923 {
6924         __free_page(spd->pages[idx]);
6925 }
6926
6927 static size_t
6928 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6929 {
6930         size_t count;
6931         int save_len;
6932         int ret;
6933
6934         /* Seq buffer is page-sized, exactly what we need. */
6935         for (;;) {
6936                 save_len = iter->seq.seq.len;
6937                 ret = print_trace_line(iter);
6938
6939                 if (trace_seq_has_overflowed(&iter->seq)) {
6940                         iter->seq.seq.len = save_len;
6941                         break;
6942                 }
6943
6944                 /*
6945                  * This should not be hit, because it should only
6946                  * be set if the iter->seq overflowed. But check it
6947                  * anyway to be safe.
6948                  */
6949                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6950                         iter->seq.seq.len = save_len;
6951                         break;
6952                 }
6953
6954                 count = trace_seq_used(&iter->seq) - save_len;
6955                 if (rem < count) {
6956                         rem = 0;
6957                         iter->seq.seq.len = save_len;
6958                         break;
6959                 }
6960
6961                 if (ret != TRACE_TYPE_NO_CONSUME)
6962                         trace_consume(iter);
6963                 rem -= count;
6964                 if (!trace_find_next_entry_inc(iter))   {
6965                         rem = 0;
6966                         iter->ent = NULL;
6967                         break;
6968                 }
6969         }
6970
6971         return rem;
6972 }
6973
6974 static ssize_t tracing_splice_read_pipe(struct file *filp,
6975                                         loff_t *ppos,
6976                                         struct pipe_inode_info *pipe,
6977                                         size_t len,
6978                                         unsigned int flags)
6979 {
6980         struct page *pages_def[PIPE_DEF_BUFFERS];
6981         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6982         struct trace_iterator *iter = filp->private_data;
6983         struct splice_pipe_desc spd = {
6984                 .pages          = pages_def,
6985                 .partial        = partial_def,
6986                 .nr_pages       = 0, /* This gets updated below. */
6987                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6988                 .ops            = &default_pipe_buf_ops,
6989                 .spd_release    = tracing_spd_release_pipe,
6990         };
6991         ssize_t ret;
6992         size_t rem;
6993         unsigned int i;
6994
6995         if (splice_grow_spd(pipe, &spd))
6996                 return -ENOMEM;
6997
6998         mutex_lock(&iter->mutex);
6999
7000         if (iter->trace->splice_read) {
7001                 ret = iter->trace->splice_read(iter, filp,
7002                                                ppos, pipe, len, flags);
7003                 if (ret)
7004                         goto out_err;
7005         }
7006
7007         ret = tracing_wait_pipe(filp);
7008         if (ret <= 0)
7009                 goto out_err;
7010
7011         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7012                 ret = -EFAULT;
7013                 goto out_err;
7014         }
7015
7016         trace_event_read_lock();
7017         trace_access_lock(iter->cpu_file);
7018
7019         /* Fill as many pages as possible. */
7020         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7021                 spd.pages[i] = alloc_page(GFP_KERNEL);
7022                 if (!spd.pages[i])
7023                         break;
7024
7025                 rem = tracing_fill_pipe_page(rem, iter);
7026
7027                 /* Copy the data into the page, so we can start over. */
7028                 ret = trace_seq_to_buffer(&iter->seq,
7029                                           page_address(spd.pages[i]),
7030                                           trace_seq_used(&iter->seq));
7031                 if (ret < 0) {
7032                         __free_page(spd.pages[i]);
7033                         break;
7034                 }
7035                 spd.partial[i].offset = 0;
7036                 spd.partial[i].len = trace_seq_used(&iter->seq);
7037
7038                 trace_seq_init(&iter->seq);
7039         }
7040
7041         trace_access_unlock(iter->cpu_file);
7042         trace_event_read_unlock();
7043         mutex_unlock(&iter->mutex);
7044
7045         spd.nr_pages = i;
7046
7047         if (i)
7048                 ret = splice_to_pipe(pipe, &spd);
7049         else
7050                 ret = 0;
7051 out:
7052         splice_shrink_spd(&spd);
7053         return ret;
7054
7055 out_err:
7056         mutex_unlock(&iter->mutex);
7057         goto out;
7058 }
7059
7060 static ssize_t
7061 tracing_entries_read(struct file *filp, char __user *ubuf,
7062                      size_t cnt, loff_t *ppos)
7063 {
7064         struct inode *inode = file_inode(filp);
7065         struct trace_array *tr = inode->i_private;
7066         int cpu = tracing_get_cpu(inode);
7067         char buf[64];
7068         int r = 0;
7069         ssize_t ret;
7070
7071         mutex_lock(&trace_types_lock);
7072
7073         if (cpu == RING_BUFFER_ALL_CPUS) {
7074                 int cpu, buf_size_same;
7075                 unsigned long size;
7076
7077                 size = 0;
7078                 buf_size_same = 1;
7079                 /* check if all cpu sizes are same */
7080                 for_each_tracing_cpu(cpu) {
7081                         /* fill in the size from first enabled cpu */
7082                         if (size == 0)
7083                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7084                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7085                                 buf_size_same = 0;
7086                                 break;
7087                         }
7088                 }
7089
7090                 if (buf_size_same) {
7091                         if (!ring_buffer_expanded)
7092                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7093                                             size >> 10,
7094                                             trace_buf_size >> 10);
7095                         else
7096                                 r = sprintf(buf, "%lu\n", size >> 10);
7097                 } else
7098                         r = sprintf(buf, "X\n");
7099         } else
7100                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7101
7102         mutex_unlock(&trace_types_lock);
7103
7104         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7105         return ret;
7106 }
7107
7108 static ssize_t
7109 tracing_entries_write(struct file *filp, const char __user *ubuf,
7110                       size_t cnt, loff_t *ppos)
7111 {
7112         struct inode *inode = file_inode(filp);
7113         struct trace_array *tr = inode->i_private;
7114         unsigned long val;
7115         int ret;
7116
7117         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7118         if (ret)
7119                 return ret;
7120
7121         /* must have at least 1 entry */
7122         if (!val)
7123                 return -EINVAL;
7124
7125         /* value is in KB */
7126         val <<= 10;
7127         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7128         if (ret < 0)
7129                 return ret;
7130
7131         *ppos += cnt;
7132
7133         return cnt;
7134 }
7135
7136 static ssize_t
7137 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7138                                 size_t cnt, loff_t *ppos)
7139 {
7140         struct trace_array *tr = filp->private_data;
7141         char buf[64];
7142         int r, cpu;
7143         unsigned long size = 0, expanded_size = 0;
7144
7145         mutex_lock(&trace_types_lock);
7146         for_each_tracing_cpu(cpu) {
7147                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7148                 if (!ring_buffer_expanded)
7149                         expanded_size += trace_buf_size >> 10;
7150         }
7151         if (ring_buffer_expanded)
7152                 r = sprintf(buf, "%lu\n", size);
7153         else
7154                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7155         mutex_unlock(&trace_types_lock);
7156
7157         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7158 }
7159
7160 static ssize_t
7161 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7162                           size_t cnt, loff_t *ppos)
7163 {
7164         /*
7165          * There is no need to read what the user has written, this function
7166          * is just to make sure that there is no error when "echo" is used
7167          */
7168
7169         *ppos += cnt;
7170
7171         return cnt;
7172 }
7173
7174 static int
7175 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7176 {
7177         struct trace_array *tr = inode->i_private;
7178
7179         /* disable tracing ? */
7180         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7181                 tracer_tracing_off(tr);
7182         /* resize the ring buffer to 0 */
7183         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7184
7185         trace_array_put(tr);
7186
7187         return 0;
7188 }
7189
7190 static ssize_t
7191 tracing_mark_write(struct file *filp, const char __user *ubuf,
7192                                         size_t cnt, loff_t *fpos)
7193 {
7194         struct trace_array *tr = filp->private_data;
7195         struct ring_buffer_event *event;
7196         enum event_trigger_type tt = ETT_NONE;
7197         struct trace_buffer *buffer;
7198         struct print_entry *entry;
7199         ssize_t written;
7200         int size;
7201         int len;
7202
7203 /* Used in tracing_mark_raw_write() as well */
7204 #define FAULTED_STR "<faulted>"
7205 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7206
7207         if (tracing_disabled)
7208                 return -EINVAL;
7209
7210         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7211                 return -EINVAL;
7212
7213         if (cnt > TRACE_BUF_SIZE)
7214                 cnt = TRACE_BUF_SIZE;
7215
7216         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7217
7218         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7219
7220         /* If less than "<faulted>", then make sure we can still add that */
7221         if (cnt < FAULTED_SIZE)
7222                 size += FAULTED_SIZE - cnt;
7223
7224         buffer = tr->array_buffer.buffer;
7225         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7226                                             tracing_gen_ctx());
7227         if (unlikely(!event))
7228                 /* Ring buffer disabled, return as if not open for write */
7229                 return -EBADF;
7230
7231         entry = ring_buffer_event_data(event);
7232         entry->ip = _THIS_IP_;
7233
7234         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7235         if (len) {
7236                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7237                 cnt = FAULTED_SIZE;
7238                 written = -EFAULT;
7239         } else
7240                 written = cnt;
7241
7242         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7243                 /* do not add \n before testing triggers, but add \0 */
7244                 entry->buf[cnt] = '\0';
7245                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7246         }
7247
7248         if (entry->buf[cnt - 1] != '\n') {
7249                 entry->buf[cnt] = '\n';
7250                 entry->buf[cnt + 1] = '\0';
7251         } else
7252                 entry->buf[cnt] = '\0';
7253
7254         if (static_branch_unlikely(&trace_marker_exports_enabled))
7255                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7256         __buffer_unlock_commit(buffer, event);
7257
7258         if (tt)
7259                 event_triggers_post_call(tr->trace_marker_file, tt);
7260
7261         return written;
7262 }
7263
7264 /* Limit it for now to 3K (including tag) */
7265 #define RAW_DATA_MAX_SIZE (1024*3)
7266
7267 static ssize_t
7268 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7269                                         size_t cnt, loff_t *fpos)
7270 {
7271         struct trace_array *tr = filp->private_data;
7272         struct ring_buffer_event *event;
7273         struct trace_buffer *buffer;
7274         struct raw_data_entry *entry;
7275         ssize_t written;
7276         int size;
7277         int len;
7278
7279 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7280
7281         if (tracing_disabled)
7282                 return -EINVAL;
7283
7284         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7285                 return -EINVAL;
7286
7287         /* The marker must at least have a tag id */
7288         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7289                 return -EINVAL;
7290
7291         if (cnt > TRACE_BUF_SIZE)
7292                 cnt = TRACE_BUF_SIZE;
7293
7294         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7295
7296         size = sizeof(*entry) + cnt;
7297         if (cnt < FAULT_SIZE_ID)
7298                 size += FAULT_SIZE_ID - cnt;
7299
7300         buffer = tr->array_buffer.buffer;
7301         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7302                                             tracing_gen_ctx());
7303         if (!event)
7304                 /* Ring buffer disabled, return as if not open for write */
7305                 return -EBADF;
7306
7307         entry = ring_buffer_event_data(event);
7308
7309         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7310         if (len) {
7311                 entry->id = -1;
7312                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7313                 written = -EFAULT;
7314         } else
7315                 written = cnt;
7316
7317         __buffer_unlock_commit(buffer, event);
7318
7319         return written;
7320 }
7321
7322 static int tracing_clock_show(struct seq_file *m, void *v)
7323 {
7324         struct trace_array *tr = m->private;
7325         int i;
7326
7327         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7328                 seq_printf(m,
7329                         "%s%s%s%s", i ? " " : "",
7330                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7331                         i == tr->clock_id ? "]" : "");
7332         seq_putc(m, '\n');
7333
7334         return 0;
7335 }
7336
7337 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7338 {
7339         int i;
7340
7341         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7342                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7343                         break;
7344         }
7345         if (i == ARRAY_SIZE(trace_clocks))
7346                 return -EINVAL;
7347
7348         mutex_lock(&trace_types_lock);
7349
7350         tr->clock_id = i;
7351
7352         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7353
7354         /*
7355          * New clock may not be consistent with the previous clock.
7356          * Reset the buffer so that it doesn't have incomparable timestamps.
7357          */
7358         tracing_reset_online_cpus(&tr->array_buffer);
7359
7360 #ifdef CONFIG_TRACER_MAX_TRACE
7361         if (tr->max_buffer.buffer)
7362                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7363         tracing_reset_online_cpus(&tr->max_buffer);
7364 #endif
7365
7366         mutex_unlock(&trace_types_lock);
7367
7368         return 0;
7369 }
7370
7371 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7372                                    size_t cnt, loff_t *fpos)
7373 {
7374         struct seq_file *m = filp->private_data;
7375         struct trace_array *tr = m->private;
7376         char buf[64];
7377         const char *clockstr;
7378         int ret;
7379
7380         if (cnt >= sizeof(buf))
7381                 return -EINVAL;
7382
7383         if (copy_from_user(buf, ubuf, cnt))
7384                 return -EFAULT;
7385
7386         buf[cnt] = 0;
7387
7388         clockstr = strstrip(buf);
7389
7390         ret = tracing_set_clock(tr, clockstr);
7391         if (ret)
7392                 return ret;
7393
7394         *fpos += cnt;
7395
7396         return cnt;
7397 }
7398
7399 static int tracing_clock_open(struct inode *inode, struct file *file)
7400 {
7401         struct trace_array *tr = inode->i_private;
7402         int ret;
7403
7404         ret = tracing_check_open_get_tr(tr);
7405         if (ret)
7406                 return ret;
7407
7408         ret = single_open(file, tracing_clock_show, inode->i_private);
7409         if (ret < 0)
7410                 trace_array_put(tr);
7411
7412         return ret;
7413 }
7414
7415 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7416 {
7417         struct trace_array *tr = m->private;
7418
7419         mutex_lock(&trace_types_lock);
7420
7421         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7422                 seq_puts(m, "delta [absolute]\n");
7423         else
7424                 seq_puts(m, "[delta] absolute\n");
7425
7426         mutex_unlock(&trace_types_lock);
7427
7428         return 0;
7429 }
7430
7431 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7432 {
7433         struct trace_array *tr = inode->i_private;
7434         int ret;
7435
7436         ret = tracing_check_open_get_tr(tr);
7437         if (ret)
7438                 return ret;
7439
7440         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7441         if (ret < 0)
7442                 trace_array_put(tr);
7443
7444         return ret;
7445 }
7446
7447 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7448 {
7449         if (rbe == this_cpu_read(trace_buffered_event))
7450                 return ring_buffer_time_stamp(buffer);
7451
7452         return ring_buffer_event_time_stamp(buffer, rbe);
7453 }
7454
7455 /*
7456  * Set or disable using the per CPU trace_buffer_event when possible.
7457  */
7458 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7459 {
7460         int ret = 0;
7461
7462         mutex_lock(&trace_types_lock);
7463
7464         if (set && tr->no_filter_buffering_ref++)
7465                 goto out;
7466
7467         if (!set) {
7468                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7469                         ret = -EINVAL;
7470                         goto out;
7471                 }
7472
7473                 --tr->no_filter_buffering_ref;
7474         }
7475  out:
7476         mutex_unlock(&trace_types_lock);
7477
7478         return ret;
7479 }
7480
7481 struct ftrace_buffer_info {
7482         struct trace_iterator   iter;
7483         void                    *spare;
7484         unsigned int            spare_cpu;
7485         unsigned int            read;
7486 };
7487
7488 #ifdef CONFIG_TRACER_SNAPSHOT
7489 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7490 {
7491         struct trace_array *tr = inode->i_private;
7492         struct trace_iterator *iter;
7493         struct seq_file *m;
7494         int ret;
7495
7496         ret = tracing_check_open_get_tr(tr);
7497         if (ret)
7498                 return ret;
7499
7500         if (file->f_mode & FMODE_READ) {
7501                 iter = __tracing_open(inode, file, true);
7502                 if (IS_ERR(iter))
7503                         ret = PTR_ERR(iter);
7504         } else {
7505                 /* Writes still need the seq_file to hold the private data */
7506                 ret = -ENOMEM;
7507                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7508                 if (!m)
7509                         goto out;
7510                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7511                 if (!iter) {
7512                         kfree(m);
7513                         goto out;
7514                 }
7515                 ret = 0;
7516
7517                 iter->tr = tr;
7518                 iter->array_buffer = &tr->max_buffer;
7519                 iter->cpu_file = tracing_get_cpu(inode);
7520                 m->private = iter;
7521                 file->private_data = m;
7522         }
7523 out:
7524         if (ret < 0)
7525                 trace_array_put(tr);
7526
7527         return ret;
7528 }
7529
7530 static void tracing_swap_cpu_buffer(void *tr)
7531 {
7532         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7533 }
7534
7535 static ssize_t
7536 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7537                        loff_t *ppos)
7538 {
7539         struct seq_file *m = filp->private_data;
7540         struct trace_iterator *iter = m->private;
7541         struct trace_array *tr = iter->tr;
7542         unsigned long val;
7543         int ret;
7544
7545         ret = tracing_update_buffers();
7546         if (ret < 0)
7547                 return ret;
7548
7549         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7550         if (ret)
7551                 return ret;
7552
7553         mutex_lock(&trace_types_lock);
7554
7555         if (tr->current_trace->use_max_tr) {
7556                 ret = -EBUSY;
7557                 goto out;
7558         }
7559
7560         local_irq_disable();
7561         arch_spin_lock(&tr->max_lock);
7562         if (tr->cond_snapshot)
7563                 ret = -EBUSY;
7564         arch_spin_unlock(&tr->max_lock);
7565         local_irq_enable();
7566         if (ret)
7567                 goto out;
7568
7569         switch (val) {
7570         case 0:
7571                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7572                         ret = -EINVAL;
7573                         break;
7574                 }
7575                 if (tr->allocated_snapshot)
7576                         free_snapshot(tr);
7577                 break;
7578         case 1:
7579 /* Only allow per-cpu swap if the ring buffer supports it */
7580 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7581                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7582                         ret = -EINVAL;
7583                         break;
7584                 }
7585 #endif
7586                 if (tr->allocated_snapshot)
7587                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7588                                         &tr->array_buffer, iter->cpu_file);
7589                 else
7590                         ret = tracing_alloc_snapshot_instance(tr);
7591                 if (ret < 0)
7592                         break;
7593                 /* Now, we're going to swap */
7594                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7595                         local_irq_disable();
7596                         update_max_tr(tr, current, smp_processor_id(), NULL);
7597                         local_irq_enable();
7598                 } else {
7599                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7600                                                  (void *)tr, 1);
7601                 }
7602                 break;
7603         default:
7604                 if (tr->allocated_snapshot) {
7605                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7606                                 tracing_reset_online_cpus(&tr->max_buffer);
7607                         else
7608                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7609                 }
7610                 break;
7611         }
7612
7613         if (ret >= 0) {
7614                 *ppos += cnt;
7615                 ret = cnt;
7616         }
7617 out:
7618         mutex_unlock(&trace_types_lock);
7619         return ret;
7620 }
7621
7622 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7623 {
7624         struct seq_file *m = file->private_data;
7625         int ret;
7626
7627         ret = tracing_release(inode, file);
7628
7629         if (file->f_mode & FMODE_READ)
7630                 return ret;
7631
7632         /* If write only, the seq_file is just a stub */
7633         if (m)
7634                 kfree(m->private);
7635         kfree(m);
7636
7637         return 0;
7638 }
7639
7640 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7641 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7642                                     size_t count, loff_t *ppos);
7643 static int tracing_buffers_release(struct inode *inode, struct file *file);
7644 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7645                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7646
7647 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7648 {
7649         struct ftrace_buffer_info *info;
7650         int ret;
7651
7652         /* The following checks for tracefs lockdown */
7653         ret = tracing_buffers_open(inode, filp);
7654         if (ret < 0)
7655                 return ret;
7656
7657         info = filp->private_data;
7658
7659         if (info->iter.trace->use_max_tr) {
7660                 tracing_buffers_release(inode, filp);
7661                 return -EBUSY;
7662         }
7663
7664         info->iter.snapshot = true;
7665         info->iter.array_buffer = &info->iter.tr->max_buffer;
7666
7667         return ret;
7668 }
7669
7670 #endif /* CONFIG_TRACER_SNAPSHOT */
7671
7672
7673 static const struct file_operations tracing_thresh_fops = {
7674         .open           = tracing_open_generic,
7675         .read           = tracing_thresh_read,
7676         .write          = tracing_thresh_write,
7677         .llseek         = generic_file_llseek,
7678 };
7679
7680 #ifdef CONFIG_TRACER_MAX_TRACE
7681 static const struct file_operations tracing_max_lat_fops = {
7682         .open           = tracing_open_generic_tr,
7683         .read           = tracing_max_lat_read,
7684         .write          = tracing_max_lat_write,
7685         .llseek         = generic_file_llseek,
7686         .release        = tracing_release_generic_tr,
7687 };
7688 #endif
7689
7690 static const struct file_operations set_tracer_fops = {
7691         .open           = tracing_open_generic_tr,
7692         .read           = tracing_set_trace_read,
7693         .write          = tracing_set_trace_write,
7694         .llseek         = generic_file_llseek,
7695         .release        = tracing_release_generic_tr,
7696 };
7697
7698 static const struct file_operations tracing_pipe_fops = {
7699         .open           = tracing_open_pipe,
7700         .poll           = tracing_poll_pipe,
7701         .read           = tracing_read_pipe,
7702         .splice_read    = tracing_splice_read_pipe,
7703         .release        = tracing_release_pipe,
7704         .llseek         = no_llseek,
7705 };
7706
7707 static const struct file_operations tracing_entries_fops = {
7708         .open           = tracing_open_generic_tr,
7709         .read           = tracing_entries_read,
7710         .write          = tracing_entries_write,
7711         .llseek         = generic_file_llseek,
7712         .release        = tracing_release_generic_tr,
7713 };
7714
7715 static const struct file_operations tracing_total_entries_fops = {
7716         .open           = tracing_open_generic_tr,
7717         .read           = tracing_total_entries_read,
7718         .llseek         = generic_file_llseek,
7719         .release        = tracing_release_generic_tr,
7720 };
7721
7722 static const struct file_operations tracing_free_buffer_fops = {
7723         .open           = tracing_open_generic_tr,
7724         .write          = tracing_free_buffer_write,
7725         .release        = tracing_free_buffer_release,
7726 };
7727
7728 static const struct file_operations tracing_mark_fops = {
7729         .open           = tracing_mark_open,
7730         .write          = tracing_mark_write,
7731         .release        = tracing_release_generic_tr,
7732 };
7733
7734 static const struct file_operations tracing_mark_raw_fops = {
7735         .open           = tracing_mark_open,
7736         .write          = tracing_mark_raw_write,
7737         .release        = tracing_release_generic_tr,
7738 };
7739
7740 static const struct file_operations trace_clock_fops = {
7741         .open           = tracing_clock_open,
7742         .read           = seq_read,
7743         .llseek         = seq_lseek,
7744         .release        = tracing_single_release_tr,
7745         .write          = tracing_clock_write,
7746 };
7747
7748 static const struct file_operations trace_time_stamp_mode_fops = {
7749         .open           = tracing_time_stamp_mode_open,
7750         .read           = seq_read,
7751         .llseek         = seq_lseek,
7752         .release        = tracing_single_release_tr,
7753 };
7754
7755 #ifdef CONFIG_TRACER_SNAPSHOT
7756 static const struct file_operations snapshot_fops = {
7757         .open           = tracing_snapshot_open,
7758         .read           = seq_read,
7759         .write          = tracing_snapshot_write,
7760         .llseek         = tracing_lseek,
7761         .release        = tracing_snapshot_release,
7762 };
7763
7764 static const struct file_operations snapshot_raw_fops = {
7765         .open           = snapshot_raw_open,
7766         .read           = tracing_buffers_read,
7767         .release        = tracing_buffers_release,
7768         .splice_read    = tracing_buffers_splice_read,
7769         .llseek         = no_llseek,
7770 };
7771
7772 #endif /* CONFIG_TRACER_SNAPSHOT */
7773
7774 /*
7775  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7776  * @filp: The active open file structure
7777  * @ubuf: The userspace provided buffer to read value into
7778  * @cnt: The maximum number of bytes to read
7779  * @ppos: The current "file" position
7780  *
7781  * This function implements the write interface for a struct trace_min_max_param.
7782  * The filp->private_data must point to a trace_min_max_param structure that
7783  * defines where to write the value, the min and the max acceptable values,
7784  * and a lock to protect the write.
7785  */
7786 static ssize_t
7787 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7788 {
7789         struct trace_min_max_param *param = filp->private_data;
7790         u64 val;
7791         int err;
7792
7793         if (!param)
7794                 return -EFAULT;
7795
7796         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7797         if (err)
7798                 return err;
7799
7800         if (param->lock)
7801                 mutex_lock(param->lock);
7802
7803         if (param->min && val < *param->min)
7804                 err = -EINVAL;
7805
7806         if (param->max && val > *param->max)
7807                 err = -EINVAL;
7808
7809         if (!err)
7810                 *param->val = val;
7811
7812         if (param->lock)
7813                 mutex_unlock(param->lock);
7814
7815         if (err)
7816                 return err;
7817
7818         return cnt;
7819 }
7820
7821 /*
7822  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7823  * @filp: The active open file structure
7824  * @ubuf: The userspace provided buffer to read value into
7825  * @cnt: The maximum number of bytes to read
7826  * @ppos: The current "file" position
7827  *
7828  * This function implements the read interface for a struct trace_min_max_param.
7829  * The filp->private_data must point to a trace_min_max_param struct with valid
7830  * data.
7831  */
7832 static ssize_t
7833 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7834 {
7835         struct trace_min_max_param *param = filp->private_data;
7836         char buf[U64_STR_SIZE];
7837         int len;
7838         u64 val;
7839
7840         if (!param)
7841                 return -EFAULT;
7842
7843         val = *param->val;
7844
7845         if (cnt > sizeof(buf))
7846                 cnt = sizeof(buf);
7847
7848         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7849
7850         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7851 }
7852
7853 const struct file_operations trace_min_max_fops = {
7854         .open           = tracing_open_generic,
7855         .read           = trace_min_max_read,
7856         .write          = trace_min_max_write,
7857 };
7858
7859 #define TRACING_LOG_ERRS_MAX    8
7860 #define TRACING_LOG_LOC_MAX     128
7861
7862 #define CMD_PREFIX "  Command: "
7863
7864 struct err_info {
7865         const char      **errs; /* ptr to loc-specific array of err strings */
7866         u8              type;   /* index into errs -> specific err string */
7867         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7868         u64             ts;
7869 };
7870
7871 struct tracing_log_err {
7872         struct list_head        list;
7873         struct err_info         info;
7874         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7875         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7876 };
7877
7878 static DEFINE_MUTEX(tracing_err_log_lock);
7879
7880 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7881 {
7882         struct tracing_log_err *err;
7883
7884         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7885                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7886                 if (!err)
7887                         err = ERR_PTR(-ENOMEM);
7888                 else
7889                         tr->n_err_log_entries++;
7890
7891                 return err;
7892         }
7893
7894         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7895         list_del(&err->list);
7896
7897         return err;
7898 }
7899
7900 /**
7901  * err_pos - find the position of a string within a command for error careting
7902  * @cmd: The tracing command that caused the error
7903  * @str: The string to position the caret at within @cmd
7904  *
7905  * Finds the position of the first occurrence of @str within @cmd.  The
7906  * return value can be passed to tracing_log_err() for caret placement
7907  * within @cmd.
7908  *
7909  * Returns the index within @cmd of the first occurrence of @str or 0
7910  * if @str was not found.
7911  */
7912 unsigned int err_pos(char *cmd, const char *str)
7913 {
7914         char *found;
7915
7916         if (WARN_ON(!strlen(cmd)))
7917                 return 0;
7918
7919         found = strstr(cmd, str);
7920         if (found)
7921                 return found - cmd;
7922
7923         return 0;
7924 }
7925
7926 /**
7927  * tracing_log_err - write an error to the tracing error log
7928  * @tr: The associated trace array for the error (NULL for top level array)
7929  * @loc: A string describing where the error occurred
7930  * @cmd: The tracing command that caused the error
7931  * @errs: The array of loc-specific static error strings
7932  * @type: The index into errs[], which produces the specific static err string
7933  * @pos: The position the caret should be placed in the cmd
7934  *
7935  * Writes an error into tracing/error_log of the form:
7936  *
7937  * <loc>: error: <text>
7938  *   Command: <cmd>
7939  *              ^
7940  *
7941  * tracing/error_log is a small log file containing the last
7942  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7943  * unless there has been a tracing error, and the error log can be
7944  * cleared and have its memory freed by writing the empty string in
7945  * truncation mode to it i.e. echo > tracing/error_log.
7946  *
7947  * NOTE: the @errs array along with the @type param are used to
7948  * produce a static error string - this string is not copied and saved
7949  * when the error is logged - only a pointer to it is saved.  See
7950  * existing callers for examples of how static strings are typically
7951  * defined for use with tracing_log_err().
7952  */
7953 void tracing_log_err(struct trace_array *tr,
7954                      const char *loc, const char *cmd,
7955                      const char **errs, u8 type, u8 pos)
7956 {
7957         struct tracing_log_err *err;
7958
7959         if (!tr)
7960                 tr = &global_trace;
7961
7962         mutex_lock(&tracing_err_log_lock);
7963         err = get_tracing_log_err(tr);
7964         if (PTR_ERR(err) == -ENOMEM) {
7965                 mutex_unlock(&tracing_err_log_lock);
7966                 return;
7967         }
7968
7969         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7970         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7971
7972         err->info.errs = errs;
7973         err->info.type = type;
7974         err->info.pos = pos;
7975         err->info.ts = local_clock();
7976
7977         list_add_tail(&err->list, &tr->err_log);
7978         mutex_unlock(&tracing_err_log_lock);
7979 }
7980
7981 static void clear_tracing_err_log(struct trace_array *tr)
7982 {
7983         struct tracing_log_err *err, *next;
7984
7985         mutex_lock(&tracing_err_log_lock);
7986         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7987                 list_del(&err->list);
7988                 kfree(err);
7989         }
7990
7991         tr->n_err_log_entries = 0;
7992         mutex_unlock(&tracing_err_log_lock);
7993 }
7994
7995 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7996 {
7997         struct trace_array *tr = m->private;
7998
7999         mutex_lock(&tracing_err_log_lock);
8000
8001         return seq_list_start(&tr->err_log, *pos);
8002 }
8003
8004 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8005 {
8006         struct trace_array *tr = m->private;
8007
8008         return seq_list_next(v, &tr->err_log, pos);
8009 }
8010
8011 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8012 {
8013         mutex_unlock(&tracing_err_log_lock);
8014 }
8015
8016 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
8017 {
8018         u8 i;
8019
8020         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8021                 seq_putc(m, ' ');
8022         for (i = 0; i < pos; i++)
8023                 seq_putc(m, ' ');
8024         seq_puts(m, "^\n");
8025 }
8026
8027 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8028 {
8029         struct tracing_log_err *err = v;
8030
8031         if (err) {
8032                 const char *err_text = err->info.errs[err->info.type];
8033                 u64 sec = err->info.ts;
8034                 u32 nsec;
8035
8036                 nsec = do_div(sec, NSEC_PER_SEC);
8037                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8038                            err->loc, err_text);
8039                 seq_printf(m, "%s", err->cmd);
8040                 tracing_err_log_show_pos(m, err->info.pos);
8041         }
8042
8043         return 0;
8044 }
8045
8046 static const struct seq_operations tracing_err_log_seq_ops = {
8047         .start  = tracing_err_log_seq_start,
8048         .next   = tracing_err_log_seq_next,
8049         .stop   = tracing_err_log_seq_stop,
8050         .show   = tracing_err_log_seq_show
8051 };
8052
8053 static int tracing_err_log_open(struct inode *inode, struct file *file)
8054 {
8055         struct trace_array *tr = inode->i_private;
8056         int ret = 0;
8057
8058         ret = tracing_check_open_get_tr(tr);
8059         if (ret)
8060                 return ret;
8061
8062         /* If this file was opened for write, then erase contents */
8063         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8064                 clear_tracing_err_log(tr);
8065
8066         if (file->f_mode & FMODE_READ) {
8067                 ret = seq_open(file, &tracing_err_log_seq_ops);
8068                 if (!ret) {
8069                         struct seq_file *m = file->private_data;
8070                         m->private = tr;
8071                 } else {
8072                         trace_array_put(tr);
8073                 }
8074         }
8075         return ret;
8076 }
8077
8078 static ssize_t tracing_err_log_write(struct file *file,
8079                                      const char __user *buffer,
8080                                      size_t count, loff_t *ppos)
8081 {
8082         return count;
8083 }
8084
8085 static int tracing_err_log_release(struct inode *inode, struct file *file)
8086 {
8087         struct trace_array *tr = inode->i_private;
8088
8089         trace_array_put(tr);
8090
8091         if (file->f_mode & FMODE_READ)
8092                 seq_release(inode, file);
8093
8094         return 0;
8095 }
8096
8097 static const struct file_operations tracing_err_log_fops = {
8098         .open           = tracing_err_log_open,
8099         .write          = tracing_err_log_write,
8100         .read           = seq_read,
8101         .llseek         = tracing_lseek,
8102         .release        = tracing_err_log_release,
8103 };
8104
8105 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8106 {
8107         struct trace_array *tr = inode->i_private;
8108         struct ftrace_buffer_info *info;
8109         int ret;
8110
8111         ret = tracing_check_open_get_tr(tr);
8112         if (ret)
8113                 return ret;
8114
8115         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8116         if (!info) {
8117                 trace_array_put(tr);
8118                 return -ENOMEM;
8119         }
8120
8121         mutex_lock(&trace_types_lock);
8122
8123         info->iter.tr           = tr;
8124         info->iter.cpu_file     = tracing_get_cpu(inode);
8125         info->iter.trace        = tr->current_trace;
8126         info->iter.array_buffer = &tr->array_buffer;
8127         info->spare             = NULL;
8128         /* Force reading ring buffer for first read */
8129         info->read              = (unsigned int)-1;
8130
8131         filp->private_data = info;
8132
8133         tr->trace_ref++;
8134
8135         mutex_unlock(&trace_types_lock);
8136
8137         ret = nonseekable_open(inode, filp);
8138         if (ret < 0)
8139                 trace_array_put(tr);
8140
8141         return ret;
8142 }
8143
8144 static __poll_t
8145 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8146 {
8147         struct ftrace_buffer_info *info = filp->private_data;
8148         struct trace_iterator *iter = &info->iter;
8149
8150         return trace_poll(iter, filp, poll_table);
8151 }
8152
8153 static ssize_t
8154 tracing_buffers_read(struct file *filp, char __user *ubuf,
8155                      size_t count, loff_t *ppos)
8156 {
8157         struct ftrace_buffer_info *info = filp->private_data;
8158         struct trace_iterator *iter = &info->iter;
8159         ssize_t ret = 0;
8160         ssize_t size;
8161
8162         if (!count)
8163                 return 0;
8164
8165 #ifdef CONFIG_TRACER_MAX_TRACE
8166         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8167                 return -EBUSY;
8168 #endif
8169
8170         if (!info->spare) {
8171                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8172                                                           iter->cpu_file);
8173                 if (IS_ERR(info->spare)) {
8174                         ret = PTR_ERR(info->spare);
8175                         info->spare = NULL;
8176                 } else {
8177                         info->spare_cpu = iter->cpu_file;
8178                 }
8179         }
8180         if (!info->spare)
8181                 return ret;
8182
8183         /* Do we have previous read data to read? */
8184         if (info->read < PAGE_SIZE)
8185                 goto read;
8186
8187  again:
8188         trace_access_lock(iter->cpu_file);
8189         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8190                                     &info->spare,
8191                                     count,
8192                                     iter->cpu_file, 0);
8193         trace_access_unlock(iter->cpu_file);
8194
8195         if (ret < 0) {
8196                 if (trace_empty(iter)) {
8197                         if ((filp->f_flags & O_NONBLOCK))
8198                                 return -EAGAIN;
8199
8200                         ret = wait_on_pipe(iter, 0);
8201                         if (ret)
8202                                 return ret;
8203
8204                         goto again;
8205                 }
8206                 return 0;
8207         }
8208
8209         info->read = 0;
8210  read:
8211         size = PAGE_SIZE - info->read;
8212         if (size > count)
8213                 size = count;
8214
8215         ret = copy_to_user(ubuf, info->spare + info->read, size);
8216         if (ret == size)
8217                 return -EFAULT;
8218
8219         size -= ret;
8220
8221         *ppos += size;
8222         info->read += size;
8223
8224         return size;
8225 }
8226
8227 static int tracing_buffers_release(struct inode *inode, struct file *file)
8228 {
8229         struct ftrace_buffer_info *info = file->private_data;
8230         struct trace_iterator *iter = &info->iter;
8231
8232         mutex_lock(&trace_types_lock);
8233
8234         iter->tr->trace_ref--;
8235
8236         __trace_array_put(iter->tr);
8237
8238         iter->wait_index++;
8239         /* Make sure the waiters see the new wait_index */
8240         smp_wmb();
8241
8242         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8243
8244         if (info->spare)
8245                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8246                                            info->spare_cpu, info->spare);
8247         kvfree(info);
8248
8249         mutex_unlock(&trace_types_lock);
8250
8251         return 0;
8252 }
8253
8254 struct buffer_ref {
8255         struct trace_buffer     *buffer;
8256         void                    *page;
8257         int                     cpu;
8258         refcount_t              refcount;
8259 };
8260
8261 static void buffer_ref_release(struct buffer_ref *ref)
8262 {
8263         if (!refcount_dec_and_test(&ref->refcount))
8264                 return;
8265         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8266         kfree(ref);
8267 }
8268
8269 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8270                                     struct pipe_buffer *buf)
8271 {
8272         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8273
8274         buffer_ref_release(ref);
8275         buf->private = 0;
8276 }
8277
8278 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8279                                 struct pipe_buffer *buf)
8280 {
8281         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8282
8283         if (refcount_read(&ref->refcount) > INT_MAX/2)
8284                 return false;
8285
8286         refcount_inc(&ref->refcount);
8287         return true;
8288 }
8289
8290 /* Pipe buffer operations for a buffer. */
8291 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8292         .release                = buffer_pipe_buf_release,
8293         .get                    = buffer_pipe_buf_get,
8294 };
8295
8296 /*
8297  * Callback from splice_to_pipe(), if we need to release some pages
8298  * at the end of the spd in case we error'ed out in filling the pipe.
8299  */
8300 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8301 {
8302         struct buffer_ref *ref =
8303                 (struct buffer_ref *)spd->partial[i].private;
8304
8305         buffer_ref_release(ref);
8306         spd->partial[i].private = 0;
8307 }
8308
8309 static ssize_t
8310 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8311                             struct pipe_inode_info *pipe, size_t len,
8312                             unsigned int flags)
8313 {
8314         struct ftrace_buffer_info *info = file->private_data;
8315         struct trace_iterator *iter = &info->iter;
8316         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8317         struct page *pages_def[PIPE_DEF_BUFFERS];
8318         struct splice_pipe_desc spd = {
8319                 .pages          = pages_def,
8320                 .partial        = partial_def,
8321                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8322                 .ops            = &buffer_pipe_buf_ops,
8323                 .spd_release    = buffer_spd_release,
8324         };
8325         struct buffer_ref *ref;
8326         int entries, i;
8327         ssize_t ret = 0;
8328
8329 #ifdef CONFIG_TRACER_MAX_TRACE
8330         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8331                 return -EBUSY;
8332 #endif
8333
8334         if (*ppos & (PAGE_SIZE - 1))
8335                 return -EINVAL;
8336
8337         if (len & (PAGE_SIZE - 1)) {
8338                 if (len < PAGE_SIZE)
8339                         return -EINVAL;
8340                 len &= PAGE_MASK;
8341         }
8342
8343         if (splice_grow_spd(pipe, &spd))
8344                 return -ENOMEM;
8345
8346  again:
8347         trace_access_lock(iter->cpu_file);
8348         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8349
8350         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8351                 struct page *page;
8352                 int r;
8353
8354                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8355                 if (!ref) {
8356                         ret = -ENOMEM;
8357                         break;
8358                 }
8359
8360                 refcount_set(&ref->refcount, 1);
8361                 ref->buffer = iter->array_buffer->buffer;
8362                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8363                 if (IS_ERR(ref->page)) {
8364                         ret = PTR_ERR(ref->page);
8365                         ref->page = NULL;
8366                         kfree(ref);
8367                         break;
8368                 }
8369                 ref->cpu = iter->cpu_file;
8370
8371                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8372                                           len, iter->cpu_file, 1);
8373                 if (r < 0) {
8374                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8375                                                    ref->page);
8376                         kfree(ref);
8377                         break;
8378                 }
8379
8380                 page = virt_to_page(ref->page);
8381
8382                 spd.pages[i] = page;
8383                 spd.partial[i].len = PAGE_SIZE;
8384                 spd.partial[i].offset = 0;
8385                 spd.partial[i].private = (unsigned long)ref;
8386                 spd.nr_pages++;
8387                 *ppos += PAGE_SIZE;
8388
8389                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8390         }
8391
8392         trace_access_unlock(iter->cpu_file);
8393         spd.nr_pages = i;
8394
8395         /* did we read anything? */
8396         if (!spd.nr_pages) {
8397                 long wait_index;
8398
8399                 if (ret)
8400                         goto out;
8401
8402                 ret = -EAGAIN;
8403                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8404                         goto out;
8405
8406                 wait_index = READ_ONCE(iter->wait_index);
8407
8408                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8409                 if (ret)
8410                         goto out;
8411
8412                 /* No need to wait after waking up when tracing is off */
8413                 if (!tracer_tracing_is_on(iter->tr))
8414                         goto out;
8415
8416                 /* Make sure we see the new wait_index */
8417                 smp_rmb();
8418                 if (wait_index != iter->wait_index)
8419                         goto out;
8420
8421                 goto again;
8422         }
8423
8424         ret = splice_to_pipe(pipe, &spd);
8425 out:
8426         splice_shrink_spd(&spd);
8427
8428         return ret;
8429 }
8430
8431 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8432 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8433 {
8434         struct ftrace_buffer_info *info = file->private_data;
8435         struct trace_iterator *iter = &info->iter;
8436
8437         if (cmd)
8438                 return -ENOIOCTLCMD;
8439
8440         mutex_lock(&trace_types_lock);
8441
8442         iter->wait_index++;
8443         /* Make sure the waiters see the new wait_index */
8444         smp_wmb();
8445
8446         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8447
8448         mutex_unlock(&trace_types_lock);
8449         return 0;
8450 }
8451
8452 static const struct file_operations tracing_buffers_fops = {
8453         .open           = tracing_buffers_open,
8454         .read           = tracing_buffers_read,
8455         .poll           = tracing_buffers_poll,
8456         .release        = tracing_buffers_release,
8457         .splice_read    = tracing_buffers_splice_read,
8458         .unlocked_ioctl = tracing_buffers_ioctl,
8459         .llseek         = no_llseek,
8460 };
8461
8462 static ssize_t
8463 tracing_stats_read(struct file *filp, char __user *ubuf,
8464                    size_t count, loff_t *ppos)
8465 {
8466         struct inode *inode = file_inode(filp);
8467         struct trace_array *tr = inode->i_private;
8468         struct array_buffer *trace_buf = &tr->array_buffer;
8469         int cpu = tracing_get_cpu(inode);
8470         struct trace_seq *s;
8471         unsigned long cnt;
8472         unsigned long long t;
8473         unsigned long usec_rem;
8474
8475         s = kmalloc(sizeof(*s), GFP_KERNEL);
8476         if (!s)
8477                 return -ENOMEM;
8478
8479         trace_seq_init(s);
8480
8481         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8482         trace_seq_printf(s, "entries: %ld\n", cnt);
8483
8484         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8485         trace_seq_printf(s, "overrun: %ld\n", cnt);
8486
8487         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8488         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8489
8490         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8491         trace_seq_printf(s, "bytes: %ld\n", cnt);
8492
8493         if (trace_clocks[tr->clock_id].in_ns) {
8494                 /* local or global for trace_clock */
8495                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8496                 usec_rem = do_div(t, USEC_PER_SEC);
8497                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8498                                                                 t, usec_rem);
8499
8500                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8501                 usec_rem = do_div(t, USEC_PER_SEC);
8502                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8503         } else {
8504                 /* counter or tsc mode for trace_clock */
8505                 trace_seq_printf(s, "oldest event ts: %llu\n",
8506                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8507
8508                 trace_seq_printf(s, "now ts: %llu\n",
8509                                 ring_buffer_time_stamp(trace_buf->buffer));
8510         }
8511
8512         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8513         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8514
8515         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8516         trace_seq_printf(s, "read events: %ld\n", cnt);
8517
8518         count = simple_read_from_buffer(ubuf, count, ppos,
8519                                         s->buffer, trace_seq_used(s));
8520
8521         kfree(s);
8522
8523         return count;
8524 }
8525
8526 static const struct file_operations tracing_stats_fops = {
8527         .open           = tracing_open_generic_tr,
8528         .read           = tracing_stats_read,
8529         .llseek         = generic_file_llseek,
8530         .release        = tracing_release_generic_tr,
8531 };
8532
8533 #ifdef CONFIG_DYNAMIC_FTRACE
8534
8535 static ssize_t
8536 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8537                   size_t cnt, loff_t *ppos)
8538 {
8539         ssize_t ret;
8540         char *buf;
8541         int r;
8542
8543         /* 256 should be plenty to hold the amount needed */
8544         buf = kmalloc(256, GFP_KERNEL);
8545         if (!buf)
8546                 return -ENOMEM;
8547
8548         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8549                       ftrace_update_tot_cnt,
8550                       ftrace_number_of_pages,
8551                       ftrace_number_of_groups);
8552
8553         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8554         kfree(buf);
8555         return ret;
8556 }
8557
8558 static const struct file_operations tracing_dyn_info_fops = {
8559         .open           = tracing_open_generic,
8560         .read           = tracing_read_dyn_info,
8561         .llseek         = generic_file_llseek,
8562 };
8563 #endif /* CONFIG_DYNAMIC_FTRACE */
8564
8565 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8566 static void
8567 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8568                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8569                 void *data)
8570 {
8571         tracing_snapshot_instance(tr);
8572 }
8573
8574 static void
8575 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8576                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8577                       void *data)
8578 {
8579         struct ftrace_func_mapper *mapper = data;
8580         long *count = NULL;
8581
8582         if (mapper)
8583                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8584
8585         if (count) {
8586
8587                 if (*count <= 0)
8588                         return;
8589
8590                 (*count)--;
8591         }
8592
8593         tracing_snapshot_instance(tr);
8594 }
8595
8596 static int
8597 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8598                       struct ftrace_probe_ops *ops, void *data)
8599 {
8600         struct ftrace_func_mapper *mapper = data;
8601         long *count = NULL;
8602
8603         seq_printf(m, "%ps:", (void *)ip);
8604
8605         seq_puts(m, "snapshot");
8606
8607         if (mapper)
8608                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8609
8610         if (count)
8611                 seq_printf(m, ":count=%ld\n", *count);
8612         else
8613                 seq_puts(m, ":unlimited\n");
8614
8615         return 0;
8616 }
8617
8618 static int
8619 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8620                      unsigned long ip, void *init_data, void **data)
8621 {
8622         struct ftrace_func_mapper *mapper = *data;
8623
8624         if (!mapper) {
8625                 mapper = allocate_ftrace_func_mapper();
8626                 if (!mapper)
8627                         return -ENOMEM;
8628                 *data = mapper;
8629         }
8630
8631         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8632 }
8633
8634 static void
8635 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8636                      unsigned long ip, void *data)
8637 {
8638         struct ftrace_func_mapper *mapper = data;
8639
8640         if (!ip) {
8641                 if (!mapper)
8642                         return;
8643                 free_ftrace_func_mapper(mapper, NULL);
8644                 return;
8645         }
8646
8647         ftrace_func_mapper_remove_ip(mapper, ip);
8648 }
8649
8650 static struct ftrace_probe_ops snapshot_probe_ops = {
8651         .func                   = ftrace_snapshot,
8652         .print                  = ftrace_snapshot_print,
8653 };
8654
8655 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8656         .func                   = ftrace_count_snapshot,
8657         .print                  = ftrace_snapshot_print,
8658         .init                   = ftrace_snapshot_init,
8659         .free                   = ftrace_snapshot_free,
8660 };
8661
8662 static int
8663 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8664                                char *glob, char *cmd, char *param, int enable)
8665 {
8666         struct ftrace_probe_ops *ops;
8667         void *count = (void *)-1;
8668         char *number;
8669         int ret;
8670
8671         if (!tr)
8672                 return -ENODEV;
8673
8674         /* hash funcs only work with set_ftrace_filter */
8675         if (!enable)
8676                 return -EINVAL;
8677
8678         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8679
8680         if (glob[0] == '!')
8681                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8682
8683         if (!param)
8684                 goto out_reg;
8685
8686         number = strsep(&param, ":");
8687
8688         if (!strlen(number))
8689                 goto out_reg;
8690
8691         /*
8692          * We use the callback data field (which is a pointer)
8693          * as our counter.
8694          */
8695         ret = kstrtoul(number, 0, (unsigned long *)&count);
8696         if (ret)
8697                 return ret;
8698
8699  out_reg:
8700         ret = tracing_alloc_snapshot_instance(tr);
8701         if (ret < 0)
8702                 goto out;
8703
8704         ret = register_ftrace_function_probe(glob, tr, ops, count);
8705
8706  out:
8707         return ret < 0 ? ret : 0;
8708 }
8709
8710 static struct ftrace_func_command ftrace_snapshot_cmd = {
8711         .name                   = "snapshot",
8712         .func                   = ftrace_trace_snapshot_callback,
8713 };
8714
8715 static __init int register_snapshot_cmd(void)
8716 {
8717         return register_ftrace_command(&ftrace_snapshot_cmd);
8718 }
8719 #else
8720 static inline __init int register_snapshot_cmd(void) { return 0; }
8721 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8722
8723 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8724 {
8725         if (WARN_ON(!tr->dir))
8726                 return ERR_PTR(-ENODEV);
8727
8728         /* Top directory uses NULL as the parent */
8729         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8730                 return NULL;
8731
8732         /* All sub buffers have a descriptor */
8733         return tr->dir;
8734 }
8735
8736 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8737 {
8738         struct dentry *d_tracer;
8739
8740         if (tr->percpu_dir)
8741                 return tr->percpu_dir;
8742
8743         d_tracer = tracing_get_dentry(tr);
8744         if (IS_ERR(d_tracer))
8745                 return NULL;
8746
8747         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8748
8749         MEM_FAIL(!tr->percpu_dir,
8750                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8751
8752         return tr->percpu_dir;
8753 }
8754
8755 static struct dentry *
8756 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8757                       void *data, long cpu, const struct file_operations *fops)
8758 {
8759         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8760
8761         if (ret) /* See tracing_get_cpu() */
8762                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8763         return ret;
8764 }
8765
8766 static void
8767 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8768 {
8769         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8770         struct dentry *d_cpu;
8771         char cpu_dir[30]; /* 30 characters should be more than enough */
8772
8773         if (!d_percpu)
8774                 return;
8775
8776         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8777         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8778         if (!d_cpu) {
8779                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8780                 return;
8781         }
8782
8783         /* per cpu trace_pipe */
8784         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8785                                 tr, cpu, &tracing_pipe_fops);
8786
8787         /* per cpu trace */
8788         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8789                                 tr, cpu, &tracing_fops);
8790
8791         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8792                                 tr, cpu, &tracing_buffers_fops);
8793
8794         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8795                                 tr, cpu, &tracing_stats_fops);
8796
8797         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8798                                 tr, cpu, &tracing_entries_fops);
8799
8800 #ifdef CONFIG_TRACER_SNAPSHOT
8801         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8802                                 tr, cpu, &snapshot_fops);
8803
8804         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8805                                 tr, cpu, &snapshot_raw_fops);
8806 #endif
8807 }
8808
8809 #ifdef CONFIG_FTRACE_SELFTEST
8810 /* Let selftest have access to static functions in this file */
8811 #include "trace_selftest.c"
8812 #endif
8813
8814 static ssize_t
8815 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8816                         loff_t *ppos)
8817 {
8818         struct trace_option_dentry *topt = filp->private_data;
8819         char *buf;
8820
8821         if (topt->flags->val & topt->opt->bit)
8822                 buf = "1\n";
8823         else
8824                 buf = "0\n";
8825
8826         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8827 }
8828
8829 static ssize_t
8830 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8831                          loff_t *ppos)
8832 {
8833         struct trace_option_dentry *topt = filp->private_data;
8834         unsigned long val;
8835         int ret;
8836
8837         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8838         if (ret)
8839                 return ret;
8840
8841         if (val != 0 && val != 1)
8842                 return -EINVAL;
8843
8844         if (!!(topt->flags->val & topt->opt->bit) != val) {
8845                 mutex_lock(&trace_types_lock);
8846                 ret = __set_tracer_option(topt->tr, topt->flags,
8847                                           topt->opt, !val);
8848                 mutex_unlock(&trace_types_lock);
8849                 if (ret)
8850                         return ret;
8851         }
8852
8853         *ppos += cnt;
8854
8855         return cnt;
8856 }
8857
8858 static int tracing_open_options(struct inode *inode, struct file *filp)
8859 {
8860         struct trace_option_dentry *topt = inode->i_private;
8861         int ret;
8862
8863         ret = tracing_check_open_get_tr(topt->tr);
8864         if (ret)
8865                 return ret;
8866
8867         filp->private_data = inode->i_private;
8868         return 0;
8869 }
8870
8871 static int tracing_release_options(struct inode *inode, struct file *file)
8872 {
8873         struct trace_option_dentry *topt = file->private_data;
8874
8875         trace_array_put(topt->tr);
8876         return 0;
8877 }
8878
8879 static const struct file_operations trace_options_fops = {
8880         .open = tracing_open_options,
8881         .read = trace_options_read,
8882         .write = trace_options_write,
8883         .llseek = generic_file_llseek,
8884         .release = tracing_release_options,
8885 };
8886
8887 /*
8888  * In order to pass in both the trace_array descriptor as well as the index
8889  * to the flag that the trace option file represents, the trace_array
8890  * has a character array of trace_flags_index[], which holds the index
8891  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8892  * The address of this character array is passed to the flag option file
8893  * read/write callbacks.
8894  *
8895  * In order to extract both the index and the trace_array descriptor,
8896  * get_tr_index() uses the following algorithm.
8897  *
8898  *   idx = *ptr;
8899  *
8900  * As the pointer itself contains the address of the index (remember
8901  * index[1] == 1).
8902  *
8903  * Then to get the trace_array descriptor, by subtracting that index
8904  * from the ptr, we get to the start of the index itself.
8905  *
8906  *   ptr - idx == &index[0]
8907  *
8908  * Then a simple container_of() from that pointer gets us to the
8909  * trace_array descriptor.
8910  */
8911 static void get_tr_index(void *data, struct trace_array **ptr,
8912                          unsigned int *pindex)
8913 {
8914         *pindex = *(unsigned char *)data;
8915
8916         *ptr = container_of(data - *pindex, struct trace_array,
8917                             trace_flags_index);
8918 }
8919
8920 static ssize_t
8921 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8922                         loff_t *ppos)
8923 {
8924         void *tr_index = filp->private_data;
8925         struct trace_array *tr;
8926         unsigned int index;
8927         char *buf;
8928
8929         get_tr_index(tr_index, &tr, &index);
8930
8931         if (tr->trace_flags & (1 << index))
8932                 buf = "1\n";
8933         else
8934                 buf = "0\n";
8935
8936         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8937 }
8938
8939 static ssize_t
8940 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8941                          loff_t *ppos)
8942 {
8943         void *tr_index = filp->private_data;
8944         struct trace_array *tr;
8945         unsigned int index;
8946         unsigned long val;
8947         int ret;
8948
8949         get_tr_index(tr_index, &tr, &index);
8950
8951         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8952         if (ret)
8953                 return ret;
8954
8955         if (val != 0 && val != 1)
8956                 return -EINVAL;
8957
8958         mutex_lock(&event_mutex);
8959         mutex_lock(&trace_types_lock);
8960         ret = set_tracer_flag(tr, 1 << index, val);
8961         mutex_unlock(&trace_types_lock);
8962         mutex_unlock(&event_mutex);
8963
8964         if (ret < 0)
8965                 return ret;
8966
8967         *ppos += cnt;
8968
8969         return cnt;
8970 }
8971
8972 static const struct file_operations trace_options_core_fops = {
8973         .open = tracing_open_generic,
8974         .read = trace_options_core_read,
8975         .write = trace_options_core_write,
8976         .llseek = generic_file_llseek,
8977 };
8978
8979 struct dentry *trace_create_file(const char *name,
8980                                  umode_t mode,
8981                                  struct dentry *parent,
8982                                  void *data,
8983                                  const struct file_operations *fops)
8984 {
8985         struct dentry *ret;
8986
8987         ret = tracefs_create_file(name, mode, parent, data, fops);
8988         if (!ret)
8989                 pr_warn("Could not create tracefs '%s' entry\n", name);
8990
8991         return ret;
8992 }
8993
8994
8995 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8996 {
8997         struct dentry *d_tracer;
8998
8999         if (tr->options)
9000                 return tr->options;
9001
9002         d_tracer = tracing_get_dentry(tr);
9003         if (IS_ERR(d_tracer))
9004                 return NULL;
9005
9006         tr->options = tracefs_create_dir("options", d_tracer);
9007         if (!tr->options) {
9008                 pr_warn("Could not create tracefs directory 'options'\n");
9009                 return NULL;
9010         }
9011
9012         return tr->options;
9013 }
9014
9015 static void
9016 create_trace_option_file(struct trace_array *tr,
9017                          struct trace_option_dentry *topt,
9018                          struct tracer_flags *flags,
9019                          struct tracer_opt *opt)
9020 {
9021         struct dentry *t_options;
9022
9023         t_options = trace_options_init_dentry(tr);
9024         if (!t_options)
9025                 return;
9026
9027         topt->flags = flags;
9028         topt->opt = opt;
9029         topt->tr = tr;
9030
9031         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9032                                         t_options, topt, &trace_options_fops);
9033
9034 }
9035
9036 static void
9037 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9038 {
9039         struct trace_option_dentry *topts;
9040         struct trace_options *tr_topts;
9041         struct tracer_flags *flags;
9042         struct tracer_opt *opts;
9043         int cnt;
9044         int i;
9045
9046         if (!tracer)
9047                 return;
9048
9049         flags = tracer->flags;
9050
9051         if (!flags || !flags->opts)
9052                 return;
9053
9054         /*
9055          * If this is an instance, only create flags for tracers
9056          * the instance may have.
9057          */
9058         if (!trace_ok_for_array(tracer, tr))
9059                 return;
9060
9061         for (i = 0; i < tr->nr_topts; i++) {
9062                 /* Make sure there's no duplicate flags. */
9063                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9064                         return;
9065         }
9066
9067         opts = flags->opts;
9068
9069         for (cnt = 0; opts[cnt].name; cnt++)
9070                 ;
9071
9072         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9073         if (!topts)
9074                 return;
9075
9076         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9077                             GFP_KERNEL);
9078         if (!tr_topts) {
9079                 kfree(topts);
9080                 return;
9081         }
9082
9083         tr->topts = tr_topts;
9084         tr->topts[tr->nr_topts].tracer = tracer;
9085         tr->topts[tr->nr_topts].topts = topts;
9086         tr->nr_topts++;
9087
9088         for (cnt = 0; opts[cnt].name; cnt++) {
9089                 create_trace_option_file(tr, &topts[cnt], flags,
9090                                          &opts[cnt]);
9091                 MEM_FAIL(topts[cnt].entry == NULL,
9092                           "Failed to create trace option: %s",
9093                           opts[cnt].name);
9094         }
9095 }
9096
9097 static struct dentry *
9098 create_trace_option_core_file(struct trace_array *tr,
9099                               const char *option, long index)
9100 {
9101         struct dentry *t_options;
9102
9103         t_options = trace_options_init_dentry(tr);
9104         if (!t_options)
9105                 return NULL;
9106
9107         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9108                                  (void *)&tr->trace_flags_index[index],
9109                                  &trace_options_core_fops);
9110 }
9111
9112 static void create_trace_options_dir(struct trace_array *tr)
9113 {
9114         struct dentry *t_options;
9115         bool top_level = tr == &global_trace;
9116         int i;
9117
9118         t_options = trace_options_init_dentry(tr);
9119         if (!t_options)
9120                 return;
9121
9122         for (i = 0; trace_options[i]; i++) {
9123                 if (top_level ||
9124                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9125                         create_trace_option_core_file(tr, trace_options[i], i);
9126         }
9127 }
9128
9129 static ssize_t
9130 rb_simple_read(struct file *filp, char __user *ubuf,
9131                size_t cnt, loff_t *ppos)
9132 {
9133         struct trace_array *tr = filp->private_data;
9134         char buf[64];
9135         int r;
9136
9137         r = tracer_tracing_is_on(tr);
9138         r = sprintf(buf, "%d\n", r);
9139
9140         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9141 }
9142
9143 static ssize_t
9144 rb_simple_write(struct file *filp, const char __user *ubuf,
9145                 size_t cnt, loff_t *ppos)
9146 {
9147         struct trace_array *tr = filp->private_data;
9148         struct trace_buffer *buffer = tr->array_buffer.buffer;
9149         unsigned long val;
9150         int ret;
9151
9152         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9153         if (ret)
9154                 return ret;
9155
9156         if (buffer) {
9157                 mutex_lock(&trace_types_lock);
9158                 if (!!val == tracer_tracing_is_on(tr)) {
9159                         val = 0; /* do nothing */
9160                 } else if (val) {
9161                         tracer_tracing_on(tr);
9162                         if (tr->current_trace->start)
9163                                 tr->current_trace->start(tr);
9164                 } else {
9165                         tracer_tracing_off(tr);
9166                         if (tr->current_trace->stop)
9167                                 tr->current_trace->stop(tr);
9168                         /* Wake up any waiters */
9169                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9170                 }
9171                 mutex_unlock(&trace_types_lock);
9172         }
9173
9174         (*ppos)++;
9175
9176         return cnt;
9177 }
9178
9179 static const struct file_operations rb_simple_fops = {
9180         .open           = tracing_open_generic_tr,
9181         .read           = rb_simple_read,
9182         .write          = rb_simple_write,
9183         .release        = tracing_release_generic_tr,
9184         .llseek         = default_llseek,
9185 };
9186
9187 static ssize_t
9188 buffer_percent_read(struct file *filp, char __user *ubuf,
9189                     size_t cnt, loff_t *ppos)
9190 {
9191         struct trace_array *tr = filp->private_data;
9192         char buf[64];
9193         int r;
9194
9195         r = tr->buffer_percent;
9196         r = sprintf(buf, "%d\n", r);
9197
9198         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9199 }
9200
9201 static ssize_t
9202 buffer_percent_write(struct file *filp, const char __user *ubuf,
9203                      size_t cnt, loff_t *ppos)
9204 {
9205         struct trace_array *tr = filp->private_data;
9206         unsigned long val;
9207         int ret;
9208
9209         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9210         if (ret)
9211                 return ret;
9212
9213         if (val > 100)
9214                 return -EINVAL;
9215
9216         tr->buffer_percent = val;
9217
9218         (*ppos)++;
9219
9220         return cnt;
9221 }
9222
9223 static const struct file_operations buffer_percent_fops = {
9224         .open           = tracing_open_generic_tr,
9225         .read           = buffer_percent_read,
9226         .write          = buffer_percent_write,
9227         .release        = tracing_release_generic_tr,
9228         .llseek         = default_llseek,
9229 };
9230
9231 static struct dentry *trace_instance_dir;
9232
9233 static void
9234 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9235
9236 static int
9237 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9238 {
9239         enum ring_buffer_flags rb_flags;
9240
9241         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9242
9243         buf->tr = tr;
9244
9245         buf->buffer = ring_buffer_alloc(size, rb_flags);
9246         if (!buf->buffer)
9247                 return -ENOMEM;
9248
9249         buf->data = alloc_percpu(struct trace_array_cpu);
9250         if (!buf->data) {
9251                 ring_buffer_free(buf->buffer);
9252                 buf->buffer = NULL;
9253                 return -ENOMEM;
9254         }
9255
9256         /* Allocate the first page for all buffers */
9257         set_buffer_entries(&tr->array_buffer,
9258                            ring_buffer_size(tr->array_buffer.buffer, 0));
9259
9260         return 0;
9261 }
9262
9263 static int allocate_trace_buffers(struct trace_array *tr, int size)
9264 {
9265         int ret;
9266
9267         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9268         if (ret)
9269                 return ret;
9270
9271 #ifdef CONFIG_TRACER_MAX_TRACE
9272         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9273                                     allocate_snapshot ? size : 1);
9274         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9275                 ring_buffer_free(tr->array_buffer.buffer);
9276                 tr->array_buffer.buffer = NULL;
9277                 free_percpu(tr->array_buffer.data);
9278                 tr->array_buffer.data = NULL;
9279                 return -ENOMEM;
9280         }
9281         tr->allocated_snapshot = allocate_snapshot;
9282
9283         /*
9284          * Only the top level trace array gets its snapshot allocated
9285          * from the kernel command line.
9286          */
9287         allocate_snapshot = false;
9288 #endif
9289
9290         return 0;
9291 }
9292
9293 static void free_trace_buffer(struct array_buffer *buf)
9294 {
9295         if (buf->buffer) {
9296                 ring_buffer_free(buf->buffer);
9297                 buf->buffer = NULL;
9298                 free_percpu(buf->data);
9299                 buf->data = NULL;
9300         }
9301 }
9302
9303 static void free_trace_buffers(struct trace_array *tr)
9304 {
9305         if (!tr)
9306                 return;
9307
9308         free_trace_buffer(&tr->array_buffer);
9309
9310 #ifdef CONFIG_TRACER_MAX_TRACE
9311         free_trace_buffer(&tr->max_buffer);
9312 #endif
9313 }
9314
9315 static void init_trace_flags_index(struct trace_array *tr)
9316 {
9317         int i;
9318
9319         /* Used by the trace options files */
9320         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9321                 tr->trace_flags_index[i] = i;
9322 }
9323
9324 static void __update_tracer_options(struct trace_array *tr)
9325 {
9326         struct tracer *t;
9327
9328         for (t = trace_types; t; t = t->next)
9329                 add_tracer_options(tr, t);
9330 }
9331
9332 static void update_tracer_options(struct trace_array *tr)
9333 {
9334         mutex_lock(&trace_types_lock);
9335         tracer_options_updated = true;
9336         __update_tracer_options(tr);
9337         mutex_unlock(&trace_types_lock);
9338 }
9339
9340 /* Must have trace_types_lock held */
9341 struct trace_array *trace_array_find(const char *instance)
9342 {
9343         struct trace_array *tr, *found = NULL;
9344
9345         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9346                 if (tr->name && strcmp(tr->name, instance) == 0) {
9347                         found = tr;
9348                         break;
9349                 }
9350         }
9351
9352         return found;
9353 }
9354
9355 struct trace_array *trace_array_find_get(const char *instance)
9356 {
9357         struct trace_array *tr;
9358
9359         mutex_lock(&trace_types_lock);
9360         tr = trace_array_find(instance);
9361         if (tr)
9362                 tr->ref++;
9363         mutex_unlock(&trace_types_lock);
9364
9365         return tr;
9366 }
9367
9368 static int trace_array_create_dir(struct trace_array *tr)
9369 {
9370         int ret;
9371
9372         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9373         if (!tr->dir)
9374                 return -EINVAL;
9375
9376         ret = event_trace_add_tracer(tr->dir, tr);
9377         if (ret) {
9378                 tracefs_remove(tr->dir);
9379                 return ret;
9380         }
9381
9382         init_tracer_tracefs(tr, tr->dir);
9383         __update_tracer_options(tr);
9384
9385         return ret;
9386 }
9387
9388 static struct trace_array *trace_array_create(const char *name)
9389 {
9390         struct trace_array *tr;
9391         int ret;
9392
9393         ret = -ENOMEM;
9394         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9395         if (!tr)
9396                 return ERR_PTR(ret);
9397
9398         tr->name = kstrdup(name, GFP_KERNEL);
9399         if (!tr->name)
9400                 goto out_free_tr;
9401
9402         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9403                 goto out_free_tr;
9404
9405         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9406                 goto out_free_tr;
9407
9408         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9409
9410         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9411
9412         raw_spin_lock_init(&tr->start_lock);
9413
9414         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9415
9416         tr->current_trace = &nop_trace;
9417
9418         INIT_LIST_HEAD(&tr->systems);
9419         INIT_LIST_HEAD(&tr->events);
9420         INIT_LIST_HEAD(&tr->hist_vars);
9421         INIT_LIST_HEAD(&tr->err_log);
9422
9423         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9424                 goto out_free_tr;
9425
9426         if (ftrace_allocate_ftrace_ops(tr) < 0)
9427                 goto out_free_tr;
9428
9429         ftrace_init_trace_array(tr);
9430
9431         init_trace_flags_index(tr);
9432
9433         if (trace_instance_dir) {
9434                 ret = trace_array_create_dir(tr);
9435                 if (ret)
9436                         goto out_free_tr;
9437         } else
9438                 __trace_early_add_events(tr);
9439
9440         list_add(&tr->list, &ftrace_trace_arrays);
9441
9442         tr->ref++;
9443
9444         return tr;
9445
9446  out_free_tr:
9447         ftrace_free_ftrace_ops(tr);
9448         free_trace_buffers(tr);
9449         free_cpumask_var(tr->pipe_cpumask);
9450         free_cpumask_var(tr->tracing_cpumask);
9451         kfree(tr->name);
9452         kfree(tr);
9453
9454         return ERR_PTR(ret);
9455 }
9456
9457 static int instance_mkdir(const char *name)
9458 {
9459         struct trace_array *tr;
9460         int ret;
9461
9462         mutex_lock(&event_mutex);
9463         mutex_lock(&trace_types_lock);
9464
9465         ret = -EEXIST;
9466         if (trace_array_find(name))
9467                 goto out_unlock;
9468
9469         tr = trace_array_create(name);
9470
9471         ret = PTR_ERR_OR_ZERO(tr);
9472
9473 out_unlock:
9474         mutex_unlock(&trace_types_lock);
9475         mutex_unlock(&event_mutex);
9476         return ret;
9477 }
9478
9479 /**
9480  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9481  * @name: The name of the trace array to be looked up/created.
9482  *
9483  * Returns pointer to trace array with given name.
9484  * NULL, if it cannot be created.
9485  *
9486  * NOTE: This function increments the reference counter associated with the
9487  * trace array returned. This makes sure it cannot be freed while in use.
9488  * Use trace_array_put() once the trace array is no longer needed.
9489  * If the trace_array is to be freed, trace_array_destroy() needs to
9490  * be called after the trace_array_put(), or simply let user space delete
9491  * it from the tracefs instances directory. But until the
9492  * trace_array_put() is called, user space can not delete it.
9493  *
9494  */
9495 struct trace_array *trace_array_get_by_name(const char *name)
9496 {
9497         struct trace_array *tr;
9498
9499         mutex_lock(&event_mutex);
9500         mutex_lock(&trace_types_lock);
9501
9502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9503                 if (tr->name && strcmp(tr->name, name) == 0)
9504                         goto out_unlock;
9505         }
9506
9507         tr = trace_array_create(name);
9508
9509         if (IS_ERR(tr))
9510                 tr = NULL;
9511 out_unlock:
9512         if (tr)
9513                 tr->ref++;
9514
9515         mutex_unlock(&trace_types_lock);
9516         mutex_unlock(&event_mutex);
9517         return tr;
9518 }
9519 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9520
9521 static int __remove_instance(struct trace_array *tr)
9522 {
9523         int i;
9524
9525         /* Reference counter for a newly created trace array = 1. */
9526         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9527                 return -EBUSY;
9528
9529         list_del(&tr->list);
9530
9531         /* Disable all the flags that were enabled coming in */
9532         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9533                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9534                         set_tracer_flag(tr, 1 << i, 0);
9535         }
9536
9537         tracing_set_nop(tr);
9538         clear_ftrace_function_probes(tr);
9539         event_trace_del_tracer(tr);
9540         ftrace_clear_pids(tr);
9541         ftrace_destroy_function_files(tr);
9542         tracefs_remove(tr->dir);
9543         free_percpu(tr->last_func_repeats);
9544         free_trace_buffers(tr);
9545         clear_tracing_err_log(tr);
9546
9547         for (i = 0; i < tr->nr_topts; i++) {
9548                 kfree(tr->topts[i].topts);
9549         }
9550         kfree(tr->topts);
9551
9552         free_cpumask_var(tr->pipe_cpumask);
9553         free_cpumask_var(tr->tracing_cpumask);
9554         kfree(tr->name);
9555         kfree(tr);
9556
9557         return 0;
9558 }
9559
9560 int trace_array_destroy(struct trace_array *this_tr)
9561 {
9562         struct trace_array *tr;
9563         int ret;
9564
9565         if (!this_tr)
9566                 return -EINVAL;
9567
9568         mutex_lock(&event_mutex);
9569         mutex_lock(&trace_types_lock);
9570
9571         ret = -ENODEV;
9572
9573         /* Making sure trace array exists before destroying it. */
9574         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9575                 if (tr == this_tr) {
9576                         ret = __remove_instance(tr);
9577                         break;
9578                 }
9579         }
9580
9581         mutex_unlock(&trace_types_lock);
9582         mutex_unlock(&event_mutex);
9583
9584         return ret;
9585 }
9586 EXPORT_SYMBOL_GPL(trace_array_destroy);
9587
9588 static int instance_rmdir(const char *name)
9589 {
9590         struct trace_array *tr;
9591         int ret;
9592
9593         mutex_lock(&event_mutex);
9594         mutex_lock(&trace_types_lock);
9595
9596         ret = -ENODEV;
9597         tr = trace_array_find(name);
9598         if (tr)
9599                 ret = __remove_instance(tr);
9600
9601         mutex_unlock(&trace_types_lock);
9602         mutex_unlock(&event_mutex);
9603
9604         return ret;
9605 }
9606
9607 static __init void create_trace_instances(struct dentry *d_tracer)
9608 {
9609         struct trace_array *tr;
9610
9611         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9612                                                          instance_mkdir,
9613                                                          instance_rmdir);
9614         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9615                 return;
9616
9617         mutex_lock(&event_mutex);
9618         mutex_lock(&trace_types_lock);
9619
9620         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9621                 if (!tr->name)
9622                         continue;
9623                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9624                              "Failed to create instance directory\n"))
9625                         break;
9626         }
9627
9628         mutex_unlock(&trace_types_lock);
9629         mutex_unlock(&event_mutex);
9630 }
9631
9632 static void
9633 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9634 {
9635         struct trace_event_file *file;
9636         int cpu;
9637
9638         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9639                         tr, &show_traces_fops);
9640
9641         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9642                         tr, &set_tracer_fops);
9643
9644         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9645                           tr, &tracing_cpumask_fops);
9646
9647         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9648                           tr, &tracing_iter_fops);
9649
9650         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9651                           tr, &tracing_fops);
9652
9653         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9654                           tr, &tracing_pipe_fops);
9655
9656         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9657                           tr, &tracing_entries_fops);
9658
9659         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9660                           tr, &tracing_total_entries_fops);
9661
9662         trace_create_file("free_buffer", 0200, d_tracer,
9663                           tr, &tracing_free_buffer_fops);
9664
9665         trace_create_file("trace_marker", 0220, d_tracer,
9666                           tr, &tracing_mark_fops);
9667
9668         file = __find_event_file(tr, "ftrace", "print");
9669         if (file && file->dir)
9670                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9671                                   file, &event_trigger_fops);
9672         tr->trace_marker_file = file;
9673
9674         trace_create_file("trace_marker_raw", 0220, d_tracer,
9675                           tr, &tracing_mark_raw_fops);
9676
9677         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9678                           &trace_clock_fops);
9679
9680         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9681                           tr, &rb_simple_fops);
9682
9683         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9684                           &trace_time_stamp_mode_fops);
9685
9686         tr->buffer_percent = 50;
9687
9688         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9689                         tr, &buffer_percent_fops);
9690
9691         create_trace_options_dir(tr);
9692
9693 #ifdef CONFIG_TRACER_MAX_TRACE
9694         trace_create_maxlat_file(tr, d_tracer);
9695 #endif
9696
9697         if (ftrace_create_function_files(tr, d_tracer))
9698                 MEM_FAIL(1, "Could not allocate function filter files");
9699
9700 #ifdef CONFIG_TRACER_SNAPSHOT
9701         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9702                           tr, &snapshot_fops);
9703 #endif
9704
9705         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9706                           tr, &tracing_err_log_fops);
9707
9708         for_each_tracing_cpu(cpu)
9709                 tracing_init_tracefs_percpu(tr, cpu);
9710
9711         ftrace_init_tracefs(tr, d_tracer);
9712 }
9713
9714 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9715 {
9716         struct vfsmount *mnt;
9717         struct file_system_type *type;
9718
9719         /*
9720          * To maintain backward compatibility for tools that mount
9721          * debugfs to get to the tracing facility, tracefs is automatically
9722          * mounted to the debugfs/tracing directory.
9723          */
9724         type = get_fs_type("tracefs");
9725         if (!type)
9726                 return NULL;
9727         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9728         put_filesystem(type);
9729         if (IS_ERR(mnt))
9730                 return NULL;
9731         mntget(mnt);
9732
9733         return mnt;
9734 }
9735
9736 /**
9737  * tracing_init_dentry - initialize top level trace array
9738  *
9739  * This is called when creating files or directories in the tracing
9740  * directory. It is called via fs_initcall() by any of the boot up code
9741  * and expects to return the dentry of the top level tracing directory.
9742  */
9743 int tracing_init_dentry(void)
9744 {
9745         struct trace_array *tr = &global_trace;
9746
9747         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9748                 pr_warn("Tracing disabled due to lockdown\n");
9749                 return -EPERM;
9750         }
9751
9752         /* The top level trace array uses  NULL as parent */
9753         if (tr->dir)
9754                 return 0;
9755
9756         if (WARN_ON(!tracefs_initialized()))
9757                 return -ENODEV;
9758
9759         /*
9760          * As there may still be users that expect the tracing
9761          * files to exist in debugfs/tracing, we must automount
9762          * the tracefs file system there, so older tools still
9763          * work with the newer kernel.
9764          */
9765         tr->dir = debugfs_create_automount("tracing", NULL,
9766                                            trace_automount, NULL);
9767
9768         return 0;
9769 }
9770
9771 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9772 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9773
9774 static struct workqueue_struct *eval_map_wq __initdata;
9775 static struct work_struct eval_map_work __initdata;
9776
9777 static void __init eval_map_work_func(struct work_struct *work)
9778 {
9779         int len;
9780
9781         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9782         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9783 }
9784
9785 static int __init trace_eval_init(void)
9786 {
9787         INIT_WORK(&eval_map_work, eval_map_work_func);
9788
9789         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9790         if (!eval_map_wq) {
9791                 pr_err("Unable to allocate eval_map_wq\n");
9792                 /* Do work here */
9793                 eval_map_work_func(&eval_map_work);
9794                 return -ENOMEM;
9795         }
9796
9797         queue_work(eval_map_wq, &eval_map_work);
9798         return 0;
9799 }
9800
9801 static int __init trace_eval_sync(void)
9802 {
9803         /* Make sure the eval map updates are finished */
9804         if (eval_map_wq)
9805                 destroy_workqueue(eval_map_wq);
9806         return 0;
9807 }
9808
9809 late_initcall_sync(trace_eval_sync);
9810
9811
9812 #ifdef CONFIG_MODULES
9813 static void trace_module_add_evals(struct module *mod)
9814 {
9815         if (!mod->num_trace_evals)
9816                 return;
9817
9818         /*
9819          * Modules with bad taint do not have events created, do
9820          * not bother with enums either.
9821          */
9822         if (trace_module_has_bad_taint(mod))
9823                 return;
9824
9825         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9826 }
9827
9828 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9829 static void trace_module_remove_evals(struct module *mod)
9830 {
9831         union trace_eval_map_item *map;
9832         union trace_eval_map_item **last = &trace_eval_maps;
9833
9834         if (!mod->num_trace_evals)
9835                 return;
9836
9837         mutex_lock(&trace_eval_mutex);
9838
9839         map = trace_eval_maps;
9840
9841         while (map) {
9842                 if (map->head.mod == mod)
9843                         break;
9844                 map = trace_eval_jmp_to_tail(map);
9845                 last = &map->tail.next;
9846                 map = map->tail.next;
9847         }
9848         if (!map)
9849                 goto out;
9850
9851         *last = trace_eval_jmp_to_tail(map)->tail.next;
9852         kfree(map);
9853  out:
9854         mutex_unlock(&trace_eval_mutex);
9855 }
9856 #else
9857 static inline void trace_module_remove_evals(struct module *mod) { }
9858 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9859
9860 static int trace_module_notify(struct notifier_block *self,
9861                                unsigned long val, void *data)
9862 {
9863         struct module *mod = data;
9864
9865         switch (val) {
9866         case MODULE_STATE_COMING:
9867                 trace_module_add_evals(mod);
9868                 break;
9869         case MODULE_STATE_GOING:
9870                 trace_module_remove_evals(mod);
9871                 break;
9872         }
9873
9874         return NOTIFY_OK;
9875 }
9876
9877 static struct notifier_block trace_module_nb = {
9878         .notifier_call = trace_module_notify,
9879         .priority = 0,
9880 };
9881 #endif /* CONFIG_MODULES */
9882
9883 static __init int tracer_init_tracefs(void)
9884 {
9885         int ret;
9886
9887         trace_access_lock_init();
9888
9889         ret = tracing_init_dentry();
9890         if (ret)
9891                 return 0;
9892
9893         event_trace_init();
9894
9895         init_tracer_tracefs(&global_trace, NULL);
9896         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9897
9898         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9899                         &global_trace, &tracing_thresh_fops);
9900
9901         trace_create_file("README", TRACE_MODE_READ, NULL,
9902                         NULL, &tracing_readme_fops);
9903
9904         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9905                         NULL, &tracing_saved_cmdlines_fops);
9906
9907         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9908                           NULL, &tracing_saved_cmdlines_size_fops);
9909
9910         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9911                         NULL, &tracing_saved_tgids_fops);
9912
9913         trace_eval_init();
9914
9915         trace_create_eval_file(NULL);
9916
9917 #ifdef CONFIG_MODULES
9918         register_module_notifier(&trace_module_nb);
9919 #endif
9920
9921 #ifdef CONFIG_DYNAMIC_FTRACE
9922         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9923                         NULL, &tracing_dyn_info_fops);
9924 #endif
9925
9926         create_trace_instances(NULL);
9927
9928         update_tracer_options(&global_trace);
9929
9930         return 0;
9931 }
9932
9933 fs_initcall(tracer_init_tracefs);
9934
9935 static int trace_panic_handler(struct notifier_block *this,
9936                                unsigned long event, void *unused)
9937 {
9938         if (ftrace_dump_on_oops)
9939                 ftrace_dump(ftrace_dump_on_oops);
9940         return NOTIFY_OK;
9941 }
9942
9943 static struct notifier_block trace_panic_notifier = {
9944         .notifier_call  = trace_panic_handler,
9945         .next           = NULL,
9946         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9947 };
9948
9949 static int trace_die_handler(struct notifier_block *self,
9950                              unsigned long val,
9951                              void *data)
9952 {
9953         switch (val) {
9954         case DIE_OOPS:
9955                 if (ftrace_dump_on_oops)
9956                         ftrace_dump(ftrace_dump_on_oops);
9957                 break;
9958         default:
9959                 break;
9960         }
9961         return NOTIFY_OK;
9962 }
9963
9964 static struct notifier_block trace_die_notifier = {
9965         .notifier_call = trace_die_handler,
9966         .priority = 200
9967 };
9968
9969 /*
9970  * printk is set to max of 1024, we really don't need it that big.
9971  * Nothing should be printing 1000 characters anyway.
9972  */
9973 #define TRACE_MAX_PRINT         1000
9974
9975 /*
9976  * Define here KERN_TRACE so that we have one place to modify
9977  * it if we decide to change what log level the ftrace dump
9978  * should be at.
9979  */
9980 #define KERN_TRACE              KERN_EMERG
9981
9982 void
9983 trace_printk_seq(struct trace_seq *s)
9984 {
9985         /* Probably should print a warning here. */
9986         if (s->seq.len >= TRACE_MAX_PRINT)
9987                 s->seq.len = TRACE_MAX_PRINT;
9988
9989         /*
9990          * More paranoid code. Although the buffer size is set to
9991          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9992          * an extra layer of protection.
9993          */
9994         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9995                 s->seq.len = s->seq.size - 1;
9996
9997         /* should be zero ended, but we are paranoid. */
9998         s->buffer[s->seq.len] = 0;
9999
10000         printk(KERN_TRACE "%s", s->buffer);
10001
10002         trace_seq_init(s);
10003 }
10004
10005 void trace_init_global_iter(struct trace_iterator *iter)
10006 {
10007         iter->tr = &global_trace;
10008         iter->trace = iter->tr->current_trace;
10009         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10010         iter->array_buffer = &global_trace.array_buffer;
10011
10012         if (iter->trace && iter->trace->open)
10013                 iter->trace->open(iter);
10014
10015         /* Annotate start of buffers if we had overruns */
10016         if (ring_buffer_overruns(iter->array_buffer->buffer))
10017                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10018
10019         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10020         if (trace_clocks[iter->tr->clock_id].in_ns)
10021                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10022
10023         /* Can not use kmalloc for iter.temp and iter.fmt */
10024         iter->temp = static_temp_buf;
10025         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10026         iter->fmt = static_fmt_buf;
10027         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10028 }
10029
10030 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10031 {
10032         /* use static because iter can be a bit big for the stack */
10033         static struct trace_iterator iter;
10034         static atomic_t dump_running;
10035         struct trace_array *tr = &global_trace;
10036         unsigned int old_userobj;
10037         unsigned long flags;
10038         int cnt = 0, cpu;
10039
10040         /* Only allow one dump user at a time. */
10041         if (atomic_inc_return(&dump_running) != 1) {
10042                 atomic_dec(&dump_running);
10043                 return;
10044         }
10045
10046         /*
10047          * Always turn off tracing when we dump.
10048          * We don't need to show trace output of what happens
10049          * between multiple crashes.
10050          *
10051          * If the user does a sysrq-z, then they can re-enable
10052          * tracing with echo 1 > tracing_on.
10053          */
10054         tracing_off();
10055
10056         local_irq_save(flags);
10057
10058         /* Simulate the iterator */
10059         trace_init_global_iter(&iter);
10060
10061         for_each_tracing_cpu(cpu) {
10062                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10063         }
10064
10065         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10066
10067         /* don't look at user memory in panic mode */
10068         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10069
10070         switch (oops_dump_mode) {
10071         case DUMP_ALL:
10072                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10073                 break;
10074         case DUMP_ORIG:
10075                 iter.cpu_file = raw_smp_processor_id();
10076                 break;
10077         case DUMP_NONE:
10078                 goto out_enable;
10079         default:
10080                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10081                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10082         }
10083
10084         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10085
10086         /* Did function tracer already get disabled? */
10087         if (ftrace_is_dead()) {
10088                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10089                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10090         }
10091
10092         /*
10093          * We need to stop all tracing on all CPUS to read
10094          * the next buffer. This is a bit expensive, but is
10095          * not done often. We fill all what we can read,
10096          * and then release the locks again.
10097          */
10098
10099         while (!trace_empty(&iter)) {
10100
10101                 if (!cnt)
10102                         printk(KERN_TRACE "---------------------------------\n");
10103
10104                 cnt++;
10105
10106                 trace_iterator_reset(&iter);
10107                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10108
10109                 if (trace_find_next_entry_inc(&iter) != NULL) {
10110                         int ret;
10111
10112                         ret = print_trace_line(&iter);
10113                         if (ret != TRACE_TYPE_NO_CONSUME)
10114                                 trace_consume(&iter);
10115                 }
10116                 touch_nmi_watchdog();
10117
10118                 trace_printk_seq(&iter.seq);
10119         }
10120
10121         if (!cnt)
10122                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10123         else
10124                 printk(KERN_TRACE "---------------------------------\n");
10125
10126  out_enable:
10127         tr->trace_flags |= old_userobj;
10128
10129         for_each_tracing_cpu(cpu) {
10130                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10131         }
10132         atomic_dec(&dump_running);
10133         local_irq_restore(flags);
10134 }
10135 EXPORT_SYMBOL_GPL(ftrace_dump);
10136
10137 #define WRITE_BUFSIZE  4096
10138
10139 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10140                                 size_t count, loff_t *ppos,
10141                                 int (*createfn)(const char *))
10142 {
10143         char *kbuf, *buf, *tmp;
10144         int ret = 0;
10145         size_t done = 0;
10146         size_t size;
10147
10148         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10149         if (!kbuf)
10150                 return -ENOMEM;
10151
10152         while (done < count) {
10153                 size = count - done;
10154
10155                 if (size >= WRITE_BUFSIZE)
10156                         size = WRITE_BUFSIZE - 1;
10157
10158                 if (copy_from_user(kbuf, buffer + done, size)) {
10159                         ret = -EFAULT;
10160                         goto out;
10161                 }
10162                 kbuf[size] = '\0';
10163                 buf = kbuf;
10164                 do {
10165                         tmp = strchr(buf, '\n');
10166                         if (tmp) {
10167                                 *tmp = '\0';
10168                                 size = tmp - buf + 1;
10169                         } else {
10170                                 size = strlen(buf);
10171                                 if (done + size < count) {
10172                                         if (buf != kbuf)
10173                                                 break;
10174                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10175                                         pr_warn("Line length is too long: Should be less than %d\n",
10176                                                 WRITE_BUFSIZE - 2);
10177                                         ret = -EINVAL;
10178                                         goto out;
10179                                 }
10180                         }
10181                         done += size;
10182
10183                         /* Remove comments */
10184                         tmp = strchr(buf, '#');
10185
10186                         if (tmp)
10187                                 *tmp = '\0';
10188
10189                         ret = createfn(buf);
10190                         if (ret)
10191                                 goto out;
10192                         buf += size;
10193
10194                 } while (done < count);
10195         }
10196         ret = done;
10197
10198 out:
10199         kfree(kbuf);
10200
10201         return ret;
10202 }
10203
10204 __init static int tracer_alloc_buffers(void)
10205 {
10206         int ring_buf_size;
10207         int ret = -ENOMEM;
10208
10209
10210         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10211                 pr_warn("Tracing disabled due to lockdown\n");
10212                 return -EPERM;
10213         }
10214
10215         /*
10216          * Make sure we don't accidentally add more trace options
10217          * than we have bits for.
10218          */
10219         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10220
10221         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10222                 goto out;
10223
10224         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10225                 goto out_free_buffer_mask;
10226
10227         /* Only allocate trace_printk buffers if a trace_printk exists */
10228         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10229                 /* Must be called before global_trace.buffer is allocated */
10230                 trace_printk_init_buffers();
10231
10232         /* To save memory, keep the ring buffer size to its minimum */
10233         if (ring_buffer_expanded)
10234                 ring_buf_size = trace_buf_size;
10235         else
10236                 ring_buf_size = 1;
10237
10238         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10239         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10240
10241         raw_spin_lock_init(&global_trace.start_lock);
10242
10243         /*
10244          * The prepare callbacks allocates some memory for the ring buffer. We
10245          * don't free the buffer if the CPU goes down. If we were to free
10246          * the buffer, then the user would lose any trace that was in the
10247          * buffer. The memory will be removed once the "instance" is removed.
10248          */
10249         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10250                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10251                                       NULL);
10252         if (ret < 0)
10253                 goto out_free_cpumask;
10254         /* Used for event triggers */
10255         ret = -ENOMEM;
10256         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10257         if (!temp_buffer)
10258                 goto out_rm_hp_state;
10259
10260         if (trace_create_savedcmd() < 0)
10261                 goto out_free_temp_buffer;
10262
10263         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10264                 goto out_free_savedcmd;
10265
10266         /* TODO: make the number of buffers hot pluggable with CPUS */
10267         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10268                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10269                 goto out_free_pipe_cpumask;
10270         }
10271         if (global_trace.buffer_disabled)
10272                 tracing_off();
10273
10274         if (trace_boot_clock) {
10275                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10276                 if (ret < 0)
10277                         pr_warn("Trace clock %s not defined, going back to default\n",
10278                                 trace_boot_clock);
10279         }
10280
10281         /*
10282          * register_tracer() might reference current_trace, so it
10283          * needs to be set before we register anything. This is
10284          * just a bootstrap of current_trace anyway.
10285          */
10286         global_trace.current_trace = &nop_trace;
10287
10288         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10289
10290         ftrace_init_global_array_ops(&global_trace);
10291
10292         init_trace_flags_index(&global_trace);
10293
10294         register_tracer(&nop_trace);
10295
10296         /* Function tracing may start here (via kernel command line) */
10297         init_function_trace();
10298
10299         /* All seems OK, enable tracing */
10300         tracing_disabled = 0;
10301
10302         atomic_notifier_chain_register(&panic_notifier_list,
10303                                        &trace_panic_notifier);
10304
10305         register_die_notifier(&trace_die_notifier);
10306
10307         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10308
10309         INIT_LIST_HEAD(&global_trace.systems);
10310         INIT_LIST_HEAD(&global_trace.events);
10311         INIT_LIST_HEAD(&global_trace.hist_vars);
10312         INIT_LIST_HEAD(&global_trace.err_log);
10313         list_add(&global_trace.list, &ftrace_trace_arrays);
10314
10315         apply_trace_boot_options();
10316
10317         register_snapshot_cmd();
10318
10319         test_can_verify();
10320
10321         return 0;
10322
10323 out_free_pipe_cpumask:
10324         free_cpumask_var(global_trace.pipe_cpumask);
10325 out_free_savedcmd:
10326         free_saved_cmdlines_buffer(savedcmd);
10327 out_free_temp_buffer:
10328         ring_buffer_free(temp_buffer);
10329 out_rm_hp_state:
10330         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10331 out_free_cpumask:
10332         free_cpumask_var(global_trace.tracing_cpumask);
10333 out_free_buffer_mask:
10334         free_cpumask_var(tracing_buffer_mask);
10335 out:
10336         return ret;
10337 }
10338
10339 void __init early_trace_init(void)
10340 {
10341         if (tracepoint_printk) {
10342                 tracepoint_print_iter =
10343                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10344                 if (MEM_FAIL(!tracepoint_print_iter,
10345                              "Failed to allocate trace iterator\n"))
10346                         tracepoint_printk = 0;
10347                 else
10348                         static_key_enable(&tracepoint_printk_key.key);
10349         }
10350         tracer_alloc_buffers();
10351
10352         init_events();
10353 }
10354
10355 void __init trace_init(void)
10356 {
10357         trace_event_init();
10358 }
10359
10360 __init static void clear_boot_tracer(void)
10361 {
10362         /*
10363          * The default tracer at boot buffer is an init section.
10364          * This function is called in lateinit. If we did not
10365          * find the boot tracer, then clear it out, to prevent
10366          * later registration from accessing the buffer that is
10367          * about to be freed.
10368          */
10369         if (!default_bootup_tracer)
10370                 return;
10371
10372         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10373                default_bootup_tracer);
10374         default_bootup_tracer = NULL;
10375 }
10376
10377 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10378 __init static void tracing_set_default_clock(void)
10379 {
10380         /* sched_clock_stable() is determined in late_initcall */
10381         if (!trace_boot_clock && !sched_clock_stable()) {
10382                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10383                         pr_warn("Can not set tracing clock due to lockdown\n");
10384                         return;
10385                 }
10386
10387                 printk(KERN_WARNING
10388                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10389                        "If you want to keep using the local clock, then add:\n"
10390                        "  \"trace_clock=local\"\n"
10391                        "on the kernel command line\n");
10392                 tracing_set_clock(&global_trace, "global");
10393         }
10394 }
10395 #else
10396 static inline void tracing_set_default_clock(void) { }
10397 #endif
10398
10399 __init static int late_trace_init(void)
10400 {
10401         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10402                 static_key_disable(&tracepoint_printk_key.key);
10403                 tracepoint_printk = 0;
10404         }
10405
10406         tracing_set_default_clock();
10407         clear_boot_tracer();
10408         return 0;
10409 }
10410
10411 late_initcall_sync(late_trace_init);