GNU Linux-libre 5.13.14-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 /*
2188  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2189  * is the tgid last observed corresponding to pid=i.
2190  */
2191 static int *tgid_map;
2192
2193 /* The maximum valid index into tgid_map. */
2194 static size_t tgid_map_max;
2195
2196 #define SAVED_CMDLINES_DEFAULT 128
2197 #define NO_CMDLINE_MAP UINT_MAX
2198 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2199 struct saved_cmdlines_buffer {
2200         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2201         unsigned *map_cmdline_to_pid;
2202         unsigned cmdline_num;
2203         int cmdline_idx;
2204         char *saved_cmdlines;
2205 };
2206 static struct saved_cmdlines_buffer *savedcmd;
2207
2208 static inline char *get_saved_cmdlines(int idx)
2209 {
2210         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2211 }
2212
2213 static inline void set_cmdline(int idx, const char *cmdline)
2214 {
2215         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2216 }
2217
2218 static int allocate_cmdlines_buffer(unsigned int val,
2219                                     struct saved_cmdlines_buffer *s)
2220 {
2221         s->map_cmdline_to_pid = kmalloc_array(val,
2222                                               sizeof(*s->map_cmdline_to_pid),
2223                                               GFP_KERNEL);
2224         if (!s->map_cmdline_to_pid)
2225                 return -ENOMEM;
2226
2227         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2228         if (!s->saved_cmdlines) {
2229                 kfree(s->map_cmdline_to_pid);
2230                 return -ENOMEM;
2231         }
2232
2233         s->cmdline_idx = 0;
2234         s->cmdline_num = val;
2235         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2236                sizeof(s->map_pid_to_cmdline));
2237         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2238                val * sizeof(*s->map_cmdline_to_pid));
2239
2240         return 0;
2241 }
2242
2243 static int trace_create_savedcmd(void)
2244 {
2245         int ret;
2246
2247         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2248         if (!savedcmd)
2249                 return -ENOMEM;
2250
2251         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2252         if (ret < 0) {
2253                 kfree(savedcmd);
2254                 savedcmd = NULL;
2255                 return -ENOMEM;
2256         }
2257
2258         return 0;
2259 }
2260
2261 int is_tracing_stopped(void)
2262 {
2263         return global_trace.stop_count;
2264 }
2265
2266 /**
2267  * tracing_start - quick start of the tracer
2268  *
2269  * If tracing is enabled but was stopped by tracing_stop,
2270  * this will start the tracer back up.
2271  */
2272 void tracing_start(void)
2273 {
2274         struct trace_buffer *buffer;
2275         unsigned long flags;
2276
2277         if (tracing_disabled)
2278                 return;
2279
2280         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2281         if (--global_trace.stop_count) {
2282                 if (global_trace.stop_count < 0) {
2283                         /* Someone screwed up their debugging */
2284                         WARN_ON_ONCE(1);
2285                         global_trace.stop_count = 0;
2286                 }
2287                 goto out;
2288         }
2289
2290         /* Prevent the buffers from switching */
2291         arch_spin_lock(&global_trace.max_lock);
2292
2293         buffer = global_trace.array_buffer.buffer;
2294         if (buffer)
2295                 ring_buffer_record_enable(buffer);
2296
2297 #ifdef CONFIG_TRACER_MAX_TRACE
2298         buffer = global_trace.max_buffer.buffer;
2299         if (buffer)
2300                 ring_buffer_record_enable(buffer);
2301 #endif
2302
2303         arch_spin_unlock(&global_trace.max_lock);
2304
2305  out:
2306         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2307 }
2308
2309 static void tracing_start_tr(struct trace_array *tr)
2310 {
2311         struct trace_buffer *buffer;
2312         unsigned long flags;
2313
2314         if (tracing_disabled)
2315                 return;
2316
2317         /* If global, we need to also start the max tracer */
2318         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2319                 return tracing_start();
2320
2321         raw_spin_lock_irqsave(&tr->start_lock, flags);
2322
2323         if (--tr->stop_count) {
2324                 if (tr->stop_count < 0) {
2325                         /* Someone screwed up their debugging */
2326                         WARN_ON_ONCE(1);
2327                         tr->stop_count = 0;
2328                 }
2329                 goto out;
2330         }
2331
2332         buffer = tr->array_buffer.buffer;
2333         if (buffer)
2334                 ring_buffer_record_enable(buffer);
2335
2336  out:
2337         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2338 }
2339
2340 /**
2341  * tracing_stop - quick stop of the tracer
2342  *
2343  * Light weight way to stop tracing. Use in conjunction with
2344  * tracing_start.
2345  */
2346 void tracing_stop(void)
2347 {
2348         struct trace_buffer *buffer;
2349         unsigned long flags;
2350
2351         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2352         if (global_trace.stop_count++)
2353                 goto out;
2354
2355         /* Prevent the buffers from switching */
2356         arch_spin_lock(&global_trace.max_lock);
2357
2358         buffer = global_trace.array_buffer.buffer;
2359         if (buffer)
2360                 ring_buffer_record_disable(buffer);
2361
2362 #ifdef CONFIG_TRACER_MAX_TRACE
2363         buffer = global_trace.max_buffer.buffer;
2364         if (buffer)
2365                 ring_buffer_record_disable(buffer);
2366 #endif
2367
2368         arch_spin_unlock(&global_trace.max_lock);
2369
2370  out:
2371         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2372 }
2373
2374 static void tracing_stop_tr(struct trace_array *tr)
2375 {
2376         struct trace_buffer *buffer;
2377         unsigned long flags;
2378
2379         /* If global, we need to also stop the max tracer */
2380         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2381                 return tracing_stop();
2382
2383         raw_spin_lock_irqsave(&tr->start_lock, flags);
2384         if (tr->stop_count++)
2385                 goto out;
2386
2387         buffer = tr->array_buffer.buffer;
2388         if (buffer)
2389                 ring_buffer_record_disable(buffer);
2390
2391  out:
2392         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2393 }
2394
2395 static int trace_save_cmdline(struct task_struct *tsk)
2396 {
2397         unsigned tpid, idx;
2398
2399         /* treat recording of idle task as a success */
2400         if (!tsk->pid)
2401                 return 1;
2402
2403         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2404
2405         /*
2406          * It's not the end of the world if we don't get
2407          * the lock, but we also don't want to spin
2408          * nor do we want to disable interrupts,
2409          * so if we miss here, then better luck next time.
2410          */
2411         if (!arch_spin_trylock(&trace_cmdline_lock))
2412                 return 0;
2413
2414         idx = savedcmd->map_pid_to_cmdline[tpid];
2415         if (idx == NO_CMDLINE_MAP) {
2416                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2417
2418                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2419                 savedcmd->cmdline_idx = idx;
2420         }
2421
2422         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2423         set_cmdline(idx, tsk->comm);
2424
2425         arch_spin_unlock(&trace_cmdline_lock);
2426
2427         return 1;
2428 }
2429
2430 static void __trace_find_cmdline(int pid, char comm[])
2431 {
2432         unsigned map;
2433         int tpid;
2434
2435         if (!pid) {
2436                 strcpy(comm, "<idle>");
2437                 return;
2438         }
2439
2440         if (WARN_ON_ONCE(pid < 0)) {
2441                 strcpy(comm, "<XXX>");
2442                 return;
2443         }
2444
2445         tpid = pid & (PID_MAX_DEFAULT - 1);
2446         map = savedcmd->map_pid_to_cmdline[tpid];
2447         if (map != NO_CMDLINE_MAP) {
2448                 tpid = savedcmd->map_cmdline_to_pid[map];
2449                 if (tpid == pid) {
2450                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2451                         return;
2452                 }
2453         }
2454         strcpy(comm, "<...>");
2455 }
2456
2457 void trace_find_cmdline(int pid, char comm[])
2458 {
2459         preempt_disable();
2460         arch_spin_lock(&trace_cmdline_lock);
2461
2462         __trace_find_cmdline(pid, comm);
2463
2464         arch_spin_unlock(&trace_cmdline_lock);
2465         preempt_enable();
2466 }
2467
2468 static int *trace_find_tgid_ptr(int pid)
2469 {
2470         /*
2471          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2472          * if we observe a non-NULL tgid_map then we also observe the correct
2473          * tgid_map_max.
2474          */
2475         int *map = smp_load_acquire(&tgid_map);
2476
2477         if (unlikely(!map || pid > tgid_map_max))
2478                 return NULL;
2479
2480         return &map[pid];
2481 }
2482
2483 int trace_find_tgid(int pid)
2484 {
2485         int *ptr = trace_find_tgid_ptr(pid);
2486
2487         return ptr ? *ptr : 0;
2488 }
2489
2490 static int trace_save_tgid(struct task_struct *tsk)
2491 {
2492         int *ptr;
2493
2494         /* treat recording of idle task as a success */
2495         if (!tsk->pid)
2496                 return 1;
2497
2498         ptr = trace_find_tgid_ptr(tsk->pid);
2499         if (!ptr)
2500                 return 0;
2501
2502         *ptr = tsk->tgid;
2503         return 1;
2504 }
2505
2506 static bool tracing_record_taskinfo_skip(int flags)
2507 {
2508         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2509                 return true;
2510         if (!__this_cpu_read(trace_taskinfo_save))
2511                 return true;
2512         return false;
2513 }
2514
2515 /**
2516  * tracing_record_taskinfo - record the task info of a task
2517  *
2518  * @task:  task to record
2519  * @flags: TRACE_RECORD_CMDLINE for recording comm
2520  *         TRACE_RECORD_TGID for recording tgid
2521  */
2522 void tracing_record_taskinfo(struct task_struct *task, int flags)
2523 {
2524         bool done;
2525
2526         if (tracing_record_taskinfo_skip(flags))
2527                 return;
2528
2529         /*
2530          * Record as much task information as possible. If some fail, continue
2531          * to try to record the others.
2532          */
2533         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2534         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2535
2536         /* If recording any information failed, retry again soon. */
2537         if (!done)
2538                 return;
2539
2540         __this_cpu_write(trace_taskinfo_save, false);
2541 }
2542
2543 /**
2544  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2545  *
2546  * @prev: previous task during sched_switch
2547  * @next: next task during sched_switch
2548  * @flags: TRACE_RECORD_CMDLINE for recording comm
2549  *         TRACE_RECORD_TGID for recording tgid
2550  */
2551 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2552                                           struct task_struct *next, int flags)
2553 {
2554         bool done;
2555
2556         if (tracing_record_taskinfo_skip(flags))
2557                 return;
2558
2559         /*
2560          * Record as much task information as possible. If some fail, continue
2561          * to try to record the others.
2562          */
2563         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2564         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2565         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2566         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2567
2568         /* If recording any information failed, retry again soon. */
2569         if (!done)
2570                 return;
2571
2572         __this_cpu_write(trace_taskinfo_save, false);
2573 }
2574
2575 /* Helpers to record a specific task information */
2576 void tracing_record_cmdline(struct task_struct *task)
2577 {
2578         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2579 }
2580
2581 void tracing_record_tgid(struct task_struct *task)
2582 {
2583         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2584 }
2585
2586 /*
2587  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2588  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2589  * simplifies those functions and keeps them in sync.
2590  */
2591 enum print_line_t trace_handle_return(struct trace_seq *s)
2592 {
2593         return trace_seq_has_overflowed(s) ?
2594                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2595 }
2596 EXPORT_SYMBOL_GPL(trace_handle_return);
2597
2598 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2599 {
2600         unsigned int trace_flags = irqs_status;
2601         unsigned int pc;
2602
2603         pc = preempt_count();
2604
2605         if (pc & NMI_MASK)
2606                 trace_flags |= TRACE_FLAG_NMI;
2607         if (pc & HARDIRQ_MASK)
2608                 trace_flags |= TRACE_FLAG_HARDIRQ;
2609         if (in_serving_softirq())
2610                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2611
2612         if (tif_need_resched())
2613                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2614         if (test_preempt_need_resched())
2615                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2616         return (trace_flags << 16) | (pc & 0xff);
2617 }
2618
2619 struct ring_buffer_event *
2620 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2621                           int type,
2622                           unsigned long len,
2623                           unsigned int trace_ctx)
2624 {
2625         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2626 }
2627
2628 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2629 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2630 static int trace_buffered_event_ref;
2631
2632 /**
2633  * trace_buffered_event_enable - enable buffering events
2634  *
2635  * When events are being filtered, it is quicker to use a temporary
2636  * buffer to write the event data into if there's a likely chance
2637  * that it will not be committed. The discard of the ring buffer
2638  * is not as fast as committing, and is much slower than copying
2639  * a commit.
2640  *
2641  * When an event is to be filtered, allocate per cpu buffers to
2642  * write the event data into, and if the event is filtered and discarded
2643  * it is simply dropped, otherwise, the entire data is to be committed
2644  * in one shot.
2645  */
2646 void trace_buffered_event_enable(void)
2647 {
2648         struct ring_buffer_event *event;
2649         struct page *page;
2650         int cpu;
2651
2652         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2653
2654         if (trace_buffered_event_ref++)
2655                 return;
2656
2657         for_each_tracing_cpu(cpu) {
2658                 page = alloc_pages_node(cpu_to_node(cpu),
2659                                         GFP_KERNEL | __GFP_NORETRY, 0);
2660                 if (!page)
2661                         goto failed;
2662
2663                 event = page_address(page);
2664                 memset(event, 0, sizeof(*event));
2665
2666                 per_cpu(trace_buffered_event, cpu) = event;
2667
2668                 preempt_disable();
2669                 if (cpu == smp_processor_id() &&
2670                     __this_cpu_read(trace_buffered_event) !=
2671                     per_cpu(trace_buffered_event, cpu))
2672                         WARN_ON_ONCE(1);
2673                 preempt_enable();
2674         }
2675
2676         return;
2677  failed:
2678         trace_buffered_event_disable();
2679 }
2680
2681 static void enable_trace_buffered_event(void *data)
2682 {
2683         /* Probably not needed, but do it anyway */
2684         smp_rmb();
2685         this_cpu_dec(trace_buffered_event_cnt);
2686 }
2687
2688 static void disable_trace_buffered_event(void *data)
2689 {
2690         this_cpu_inc(trace_buffered_event_cnt);
2691 }
2692
2693 /**
2694  * trace_buffered_event_disable - disable buffering events
2695  *
2696  * When a filter is removed, it is faster to not use the buffered
2697  * events, and to commit directly into the ring buffer. Free up
2698  * the temp buffers when there are no more users. This requires
2699  * special synchronization with current events.
2700  */
2701 void trace_buffered_event_disable(void)
2702 {
2703         int cpu;
2704
2705         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2706
2707         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2708                 return;
2709
2710         if (--trace_buffered_event_ref)
2711                 return;
2712
2713         preempt_disable();
2714         /* For each CPU, set the buffer as used. */
2715         smp_call_function_many(tracing_buffer_mask,
2716                                disable_trace_buffered_event, NULL, 1);
2717         preempt_enable();
2718
2719         /* Wait for all current users to finish */
2720         synchronize_rcu();
2721
2722         for_each_tracing_cpu(cpu) {
2723                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2724                 per_cpu(trace_buffered_event, cpu) = NULL;
2725         }
2726         /*
2727          * Make sure trace_buffered_event is NULL before clearing
2728          * trace_buffered_event_cnt.
2729          */
2730         smp_wmb();
2731
2732         preempt_disable();
2733         /* Do the work on each cpu */
2734         smp_call_function_many(tracing_buffer_mask,
2735                                enable_trace_buffered_event, NULL, 1);
2736         preempt_enable();
2737 }
2738
2739 static struct trace_buffer *temp_buffer;
2740
2741 struct ring_buffer_event *
2742 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2743                           struct trace_event_file *trace_file,
2744                           int type, unsigned long len,
2745                           unsigned int trace_ctx)
2746 {
2747         struct ring_buffer_event *entry;
2748         struct trace_array *tr = trace_file->tr;
2749         int val;
2750
2751         *current_rb = tr->array_buffer.buffer;
2752
2753         if (!tr->no_filter_buffering_ref &&
2754             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2755             (entry = this_cpu_read(trace_buffered_event))) {
2756                 /* Try to use the per cpu buffer first */
2757                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2758                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2759                         trace_event_setup(entry, type, trace_ctx);
2760                         entry->array[0] = len;
2761                         return entry;
2762                 }
2763                 this_cpu_dec(trace_buffered_event_cnt);
2764         }
2765
2766         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2767                                             trace_ctx);
2768         /*
2769          * If tracing is off, but we have triggers enabled
2770          * we still need to look at the event data. Use the temp_buffer
2771          * to store the trace event for the trigger to use. It's recursive
2772          * safe and will not be recorded anywhere.
2773          */
2774         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2775                 *current_rb = temp_buffer;
2776                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2777                                                     trace_ctx);
2778         }
2779         return entry;
2780 }
2781 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2782
2783 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2784 static DEFINE_MUTEX(tracepoint_printk_mutex);
2785
2786 static void output_printk(struct trace_event_buffer *fbuffer)
2787 {
2788         struct trace_event_call *event_call;
2789         struct trace_event_file *file;
2790         struct trace_event *event;
2791         unsigned long flags;
2792         struct trace_iterator *iter = tracepoint_print_iter;
2793
2794         /* We should never get here if iter is NULL */
2795         if (WARN_ON_ONCE(!iter))
2796                 return;
2797
2798         event_call = fbuffer->trace_file->event_call;
2799         if (!event_call || !event_call->event.funcs ||
2800             !event_call->event.funcs->trace)
2801                 return;
2802
2803         file = fbuffer->trace_file;
2804         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2805             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2806              !filter_match_preds(file->filter, fbuffer->entry)))
2807                 return;
2808
2809         event = &fbuffer->trace_file->event_call->event;
2810
2811         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2812         trace_seq_init(&iter->seq);
2813         iter->ent = fbuffer->entry;
2814         event_call->event.funcs->trace(iter, 0, event);
2815         trace_seq_putc(&iter->seq, 0);
2816         printk("%s", iter->seq.buffer);
2817
2818         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2819 }
2820
2821 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2822                              void *buffer, size_t *lenp,
2823                              loff_t *ppos)
2824 {
2825         int save_tracepoint_printk;
2826         int ret;
2827
2828         mutex_lock(&tracepoint_printk_mutex);
2829         save_tracepoint_printk = tracepoint_printk;
2830
2831         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2832
2833         /*
2834          * This will force exiting early, as tracepoint_printk
2835          * is always zero when tracepoint_printk_iter is not allocated
2836          */
2837         if (!tracepoint_print_iter)
2838                 tracepoint_printk = 0;
2839
2840         if (save_tracepoint_printk == tracepoint_printk)
2841                 goto out;
2842
2843         if (tracepoint_printk)
2844                 static_key_enable(&tracepoint_printk_key.key);
2845         else
2846                 static_key_disable(&tracepoint_printk_key.key);
2847
2848  out:
2849         mutex_unlock(&tracepoint_printk_mutex);
2850
2851         return ret;
2852 }
2853
2854 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2855 {
2856         enum event_trigger_type tt = ETT_NONE;
2857         struct trace_event_file *file = fbuffer->trace_file;
2858
2859         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2860                         fbuffer->entry, &tt))
2861                 goto discard;
2862
2863         if (static_key_false(&tracepoint_printk_key.key))
2864                 output_printk(fbuffer);
2865
2866         if (static_branch_unlikely(&trace_event_exports_enabled))
2867                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2868
2869         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2870                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2871
2872 discard:
2873         if (tt)
2874                 event_triggers_post_call(file, tt);
2875
2876 }
2877 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2878
2879 /*
2880  * Skip 3:
2881  *
2882  *   trace_buffer_unlock_commit_regs()
2883  *   trace_event_buffer_commit()
2884  *   trace_event_raw_event_xxx()
2885  */
2886 # define STACK_SKIP 3
2887
2888 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2889                                      struct trace_buffer *buffer,
2890                                      struct ring_buffer_event *event,
2891                                      unsigned int trace_ctx,
2892                                      struct pt_regs *regs)
2893 {
2894         __buffer_unlock_commit(buffer, event);
2895
2896         /*
2897          * If regs is not set, then skip the necessary functions.
2898          * Note, we can still get here via blktrace, wakeup tracer
2899          * and mmiotrace, but that's ok if they lose a function or
2900          * two. They are not that meaningful.
2901          */
2902         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2903         ftrace_trace_userstack(tr, buffer, trace_ctx);
2904 }
2905
2906 /*
2907  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2908  */
2909 void
2910 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2911                                    struct ring_buffer_event *event)
2912 {
2913         __buffer_unlock_commit(buffer, event);
2914 }
2915
2916 void
2917 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2918                parent_ip, unsigned int trace_ctx)
2919 {
2920         struct trace_event_call *call = &event_function;
2921         struct trace_buffer *buffer = tr->array_buffer.buffer;
2922         struct ring_buffer_event *event;
2923         struct ftrace_entry *entry;
2924
2925         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2926                                             trace_ctx);
2927         if (!event)
2928                 return;
2929         entry   = ring_buffer_event_data(event);
2930         entry->ip                       = ip;
2931         entry->parent_ip                = parent_ip;
2932
2933         if (!call_filter_check_discard(call, entry, buffer, event)) {
2934                 if (static_branch_unlikely(&trace_function_exports_enabled))
2935                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2936                 __buffer_unlock_commit(buffer, event);
2937         }
2938 }
2939
2940 #ifdef CONFIG_STACKTRACE
2941
2942 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2943 #define FTRACE_KSTACK_NESTING   4
2944
2945 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2946
2947 struct ftrace_stack {
2948         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2949 };
2950
2951
2952 struct ftrace_stacks {
2953         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2954 };
2955
2956 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2957 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2958
2959 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2960                                  unsigned int trace_ctx,
2961                                  int skip, struct pt_regs *regs)
2962 {
2963         struct trace_event_call *call = &event_kernel_stack;
2964         struct ring_buffer_event *event;
2965         unsigned int size, nr_entries;
2966         struct ftrace_stack *fstack;
2967         struct stack_entry *entry;
2968         int stackidx;
2969
2970         /*
2971          * Add one, for this function and the call to save_stack_trace()
2972          * If regs is set, then these functions will not be in the way.
2973          */
2974 #ifndef CONFIG_UNWINDER_ORC
2975         if (!regs)
2976                 skip++;
2977 #endif
2978
2979         preempt_disable_notrace();
2980
2981         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2982
2983         /* This should never happen. If it does, yell once and skip */
2984         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2985                 goto out;
2986
2987         /*
2988          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2989          * interrupt will either see the value pre increment or post
2990          * increment. If the interrupt happens pre increment it will have
2991          * restored the counter when it returns.  We just need a barrier to
2992          * keep gcc from moving things around.
2993          */
2994         barrier();
2995
2996         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2997         size = ARRAY_SIZE(fstack->calls);
2998
2999         if (regs) {
3000                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3001                                                    size, skip);
3002         } else {
3003                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3004         }
3005
3006         size = nr_entries * sizeof(unsigned long);
3007         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3008                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3009                                     trace_ctx);
3010         if (!event)
3011                 goto out;
3012         entry = ring_buffer_event_data(event);
3013
3014         memcpy(&entry->caller, fstack->calls, size);
3015         entry->size = nr_entries;
3016
3017         if (!call_filter_check_discard(call, entry, buffer, event))
3018                 __buffer_unlock_commit(buffer, event);
3019
3020  out:
3021         /* Again, don't let gcc optimize things here */
3022         barrier();
3023         __this_cpu_dec(ftrace_stack_reserve);
3024         preempt_enable_notrace();
3025
3026 }
3027
3028 static inline void ftrace_trace_stack(struct trace_array *tr,
3029                                       struct trace_buffer *buffer,
3030                                       unsigned int trace_ctx,
3031                                       int skip, struct pt_regs *regs)
3032 {
3033         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3034                 return;
3035
3036         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3037 }
3038
3039 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3040                    int skip)
3041 {
3042         struct trace_buffer *buffer = tr->array_buffer.buffer;
3043
3044         if (rcu_is_watching()) {
3045                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3046                 return;
3047         }
3048
3049         /*
3050          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3051          * but if the above rcu_is_watching() failed, then the NMI
3052          * triggered someplace critical, and rcu_irq_enter() should
3053          * not be called from NMI.
3054          */
3055         if (unlikely(in_nmi()))
3056                 return;
3057
3058         rcu_irq_enter_irqson();
3059         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3060         rcu_irq_exit_irqson();
3061 }
3062
3063 /**
3064  * trace_dump_stack - record a stack back trace in the trace buffer
3065  * @skip: Number of functions to skip (helper handlers)
3066  */
3067 void trace_dump_stack(int skip)
3068 {
3069         if (tracing_disabled || tracing_selftest_running)
3070                 return;
3071
3072 #ifndef CONFIG_UNWINDER_ORC
3073         /* Skip 1 to skip this function. */
3074         skip++;
3075 #endif
3076         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3077                              tracing_gen_ctx(), skip, NULL);
3078 }
3079 EXPORT_SYMBOL_GPL(trace_dump_stack);
3080
3081 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3082 static DEFINE_PER_CPU(int, user_stack_count);
3083
3084 static void
3085 ftrace_trace_userstack(struct trace_array *tr,
3086                        struct trace_buffer *buffer, unsigned int trace_ctx)
3087 {
3088         struct trace_event_call *call = &event_user_stack;
3089         struct ring_buffer_event *event;
3090         struct userstack_entry *entry;
3091
3092         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3093                 return;
3094
3095         /*
3096          * NMIs can not handle page faults, even with fix ups.
3097          * The save user stack can (and often does) fault.
3098          */
3099         if (unlikely(in_nmi()))
3100                 return;
3101
3102         /*
3103          * prevent recursion, since the user stack tracing may
3104          * trigger other kernel events.
3105          */
3106         preempt_disable();
3107         if (__this_cpu_read(user_stack_count))
3108                 goto out;
3109
3110         __this_cpu_inc(user_stack_count);
3111
3112         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3113                                             sizeof(*entry), trace_ctx);
3114         if (!event)
3115                 goto out_drop_count;
3116         entry   = ring_buffer_event_data(event);
3117
3118         entry->tgid             = current->tgid;
3119         memset(&entry->caller, 0, sizeof(entry->caller));
3120
3121         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3122         if (!call_filter_check_discard(call, entry, buffer, event))
3123                 __buffer_unlock_commit(buffer, event);
3124
3125  out_drop_count:
3126         __this_cpu_dec(user_stack_count);
3127  out:
3128         preempt_enable();
3129 }
3130 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3131 static void ftrace_trace_userstack(struct trace_array *tr,
3132                                    struct trace_buffer *buffer,
3133                                    unsigned int trace_ctx)
3134 {
3135 }
3136 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3137
3138 #endif /* CONFIG_STACKTRACE */
3139
3140 static inline void
3141 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3142                           unsigned long long delta)
3143 {
3144         entry->bottom_delta_ts = delta & U32_MAX;
3145         entry->top_delta_ts = (delta >> 32);
3146 }
3147
3148 void trace_last_func_repeats(struct trace_array *tr,
3149                              struct trace_func_repeats *last_info,
3150                              unsigned int trace_ctx)
3151 {
3152         struct trace_buffer *buffer = tr->array_buffer.buffer;
3153         struct func_repeats_entry *entry;
3154         struct ring_buffer_event *event;
3155         u64 delta;
3156
3157         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3158                                             sizeof(*entry), trace_ctx);
3159         if (!event)
3160                 return;
3161
3162         delta = ring_buffer_event_time_stamp(buffer, event) -
3163                 last_info->ts_last_call;
3164
3165         entry = ring_buffer_event_data(event);
3166         entry->ip = last_info->ip;
3167         entry->parent_ip = last_info->parent_ip;
3168         entry->count = last_info->count;
3169         func_repeats_set_delta_ts(entry, delta);
3170
3171         __buffer_unlock_commit(buffer, event);
3172 }
3173
3174 /* created for use with alloc_percpu */
3175 struct trace_buffer_struct {
3176         int nesting;
3177         char buffer[4][TRACE_BUF_SIZE];
3178 };
3179
3180 static struct trace_buffer_struct *trace_percpu_buffer;
3181
3182 /*
3183  * This allows for lockless recording.  If we're nested too deeply, then
3184  * this returns NULL.
3185  */
3186 static char *get_trace_buf(void)
3187 {
3188         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3189
3190         if (!buffer || buffer->nesting >= 4)
3191                 return NULL;
3192
3193         buffer->nesting++;
3194
3195         /* Interrupts must see nesting incremented before we use the buffer */
3196         barrier();
3197         return &buffer->buffer[buffer->nesting - 1][0];
3198 }
3199
3200 static void put_trace_buf(void)
3201 {
3202         /* Don't let the decrement of nesting leak before this */
3203         barrier();
3204         this_cpu_dec(trace_percpu_buffer->nesting);
3205 }
3206
3207 static int alloc_percpu_trace_buffer(void)
3208 {
3209         struct trace_buffer_struct *buffers;
3210
3211         if (trace_percpu_buffer)
3212                 return 0;
3213
3214         buffers = alloc_percpu(struct trace_buffer_struct);
3215         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3216                 return -ENOMEM;
3217
3218         trace_percpu_buffer = buffers;
3219         return 0;
3220 }
3221
3222 static int buffers_allocated;
3223
3224 void trace_printk_init_buffers(void)
3225 {
3226         if (buffers_allocated)
3227                 return;
3228
3229         if (alloc_percpu_trace_buffer())
3230                 return;
3231
3232         /* trace_printk() is for debug use only. Don't use it in production. */
3233
3234         pr_warn("\n");
3235         pr_warn("**********************************************************\n");
3236         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3237         pr_warn("**                                                      **\n");
3238         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3239         pr_warn("**                                                      **\n");
3240         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3241         pr_warn("** unsafe for production use.                           **\n");
3242         pr_warn("**                                                      **\n");
3243         pr_warn("** If you see this message and you are not debugging    **\n");
3244         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3245         pr_warn("**                                                      **\n");
3246         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3247         pr_warn("**********************************************************\n");
3248
3249         /* Expand the buffers to set size */
3250         tracing_update_buffers();
3251
3252         buffers_allocated = 1;
3253
3254         /*
3255          * trace_printk_init_buffers() can be called by modules.
3256          * If that happens, then we need to start cmdline recording
3257          * directly here. If the global_trace.buffer is already
3258          * allocated here, then this was called by module code.
3259          */
3260         if (global_trace.array_buffer.buffer)
3261                 tracing_start_cmdline_record();
3262 }
3263 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3264
3265 void trace_printk_start_comm(void)
3266 {
3267         /* Start tracing comms if trace printk is set */
3268         if (!buffers_allocated)
3269                 return;
3270         tracing_start_cmdline_record();
3271 }
3272
3273 static void trace_printk_start_stop_comm(int enabled)
3274 {
3275         if (!buffers_allocated)
3276                 return;
3277
3278         if (enabled)
3279                 tracing_start_cmdline_record();
3280         else
3281                 tracing_stop_cmdline_record();
3282 }
3283
3284 /**
3285  * trace_vbprintk - write binary msg to tracing buffer
3286  * @ip:    The address of the caller
3287  * @fmt:   The string format to write to the buffer
3288  * @args:  Arguments for @fmt
3289  */
3290 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3291 {
3292         struct trace_event_call *call = &event_bprint;
3293         struct ring_buffer_event *event;
3294         struct trace_buffer *buffer;
3295         struct trace_array *tr = &global_trace;
3296         struct bprint_entry *entry;
3297         unsigned int trace_ctx;
3298         char *tbuffer;
3299         int len = 0, size;
3300
3301         if (unlikely(tracing_selftest_running || tracing_disabled))
3302                 return 0;
3303
3304         /* Don't pollute graph traces with trace_vprintk internals */
3305         pause_graph_tracing();
3306
3307         trace_ctx = tracing_gen_ctx();
3308         preempt_disable_notrace();
3309
3310         tbuffer = get_trace_buf();
3311         if (!tbuffer) {
3312                 len = 0;
3313                 goto out_nobuffer;
3314         }
3315
3316         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3317
3318         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3319                 goto out_put;
3320
3321         size = sizeof(*entry) + sizeof(u32) * len;
3322         buffer = tr->array_buffer.buffer;
3323         ring_buffer_nest_start(buffer);
3324         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3325                                             trace_ctx);
3326         if (!event)
3327                 goto out;
3328         entry = ring_buffer_event_data(event);
3329         entry->ip                       = ip;
3330         entry->fmt                      = fmt;
3331
3332         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3333         if (!call_filter_check_discard(call, entry, buffer, event)) {
3334                 __buffer_unlock_commit(buffer, event);
3335                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3336         }
3337
3338 out:
3339         ring_buffer_nest_end(buffer);
3340 out_put:
3341         put_trace_buf();
3342
3343 out_nobuffer:
3344         preempt_enable_notrace();
3345         unpause_graph_tracing();
3346
3347         return len;
3348 }
3349 EXPORT_SYMBOL_GPL(trace_vbprintk);
3350
3351 __printf(3, 0)
3352 static int
3353 __trace_array_vprintk(struct trace_buffer *buffer,
3354                       unsigned long ip, const char *fmt, va_list args)
3355 {
3356         struct trace_event_call *call = &event_print;
3357         struct ring_buffer_event *event;
3358         int len = 0, size;
3359         struct print_entry *entry;
3360         unsigned int trace_ctx;
3361         char *tbuffer;
3362
3363         if (tracing_disabled || tracing_selftest_running)
3364                 return 0;
3365
3366         /* Don't pollute graph traces with trace_vprintk internals */
3367         pause_graph_tracing();
3368
3369         trace_ctx = tracing_gen_ctx();
3370         preempt_disable_notrace();
3371
3372
3373         tbuffer = get_trace_buf();
3374         if (!tbuffer) {
3375                 len = 0;
3376                 goto out_nobuffer;
3377         }
3378
3379         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3380
3381         size = sizeof(*entry) + len + 1;
3382         ring_buffer_nest_start(buffer);
3383         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3384                                             trace_ctx);
3385         if (!event)
3386                 goto out;
3387         entry = ring_buffer_event_data(event);
3388         entry->ip = ip;
3389
3390         memcpy(&entry->buf, tbuffer, len + 1);
3391         if (!call_filter_check_discard(call, entry, buffer, event)) {
3392                 __buffer_unlock_commit(buffer, event);
3393                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3394         }
3395
3396 out:
3397         ring_buffer_nest_end(buffer);
3398         put_trace_buf();
3399
3400 out_nobuffer:
3401         preempt_enable_notrace();
3402         unpause_graph_tracing();
3403
3404         return len;
3405 }
3406
3407 __printf(3, 0)
3408 int trace_array_vprintk(struct trace_array *tr,
3409                         unsigned long ip, const char *fmt, va_list args)
3410 {
3411         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3412 }
3413
3414 /**
3415  * trace_array_printk - Print a message to a specific instance
3416  * @tr: The instance trace_array descriptor
3417  * @ip: The instruction pointer that this is called from.
3418  * @fmt: The format to print (printf format)
3419  *
3420  * If a subsystem sets up its own instance, they have the right to
3421  * printk strings into their tracing instance buffer using this
3422  * function. Note, this function will not write into the top level
3423  * buffer (use trace_printk() for that), as writing into the top level
3424  * buffer should only have events that can be individually disabled.
3425  * trace_printk() is only used for debugging a kernel, and should not
3426  * be ever incorporated in normal use.
3427  *
3428  * trace_array_printk() can be used, as it will not add noise to the
3429  * top level tracing buffer.
3430  *
3431  * Note, trace_array_init_printk() must be called on @tr before this
3432  * can be used.
3433  */
3434 __printf(3, 0)
3435 int trace_array_printk(struct trace_array *tr,
3436                        unsigned long ip, const char *fmt, ...)
3437 {
3438         int ret;
3439         va_list ap;
3440
3441         if (!tr)
3442                 return -ENOENT;
3443
3444         /* This is only allowed for created instances */
3445         if (tr == &global_trace)
3446                 return 0;
3447
3448         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3449                 return 0;
3450
3451         va_start(ap, fmt);
3452         ret = trace_array_vprintk(tr, ip, fmt, ap);
3453         va_end(ap);
3454         return ret;
3455 }
3456 EXPORT_SYMBOL_GPL(trace_array_printk);
3457
3458 /**
3459  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3460  * @tr: The trace array to initialize the buffers for
3461  *
3462  * As trace_array_printk() only writes into instances, they are OK to
3463  * have in the kernel (unlike trace_printk()). This needs to be called
3464  * before trace_array_printk() can be used on a trace_array.
3465  */
3466 int trace_array_init_printk(struct trace_array *tr)
3467 {
3468         if (!tr)
3469                 return -ENOENT;
3470
3471         /* This is only allowed for created instances */
3472         if (tr == &global_trace)
3473                 return -EINVAL;
3474
3475         return alloc_percpu_trace_buffer();
3476 }
3477 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3478
3479 __printf(3, 4)
3480 int trace_array_printk_buf(struct trace_buffer *buffer,
3481                            unsigned long ip, const char *fmt, ...)
3482 {
3483         int ret;
3484         va_list ap;
3485
3486         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3487                 return 0;
3488
3489         va_start(ap, fmt);
3490         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3491         va_end(ap);
3492         return ret;
3493 }
3494
3495 __printf(2, 0)
3496 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3497 {
3498         return trace_array_vprintk(&global_trace, ip, fmt, args);
3499 }
3500 EXPORT_SYMBOL_GPL(trace_vprintk);
3501
3502 static void trace_iterator_increment(struct trace_iterator *iter)
3503 {
3504         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3505
3506         iter->idx++;
3507         if (buf_iter)
3508                 ring_buffer_iter_advance(buf_iter);
3509 }
3510
3511 static struct trace_entry *
3512 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3513                 unsigned long *lost_events)
3514 {
3515         struct ring_buffer_event *event;
3516         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3517
3518         if (buf_iter) {
3519                 event = ring_buffer_iter_peek(buf_iter, ts);
3520                 if (lost_events)
3521                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3522                                 (unsigned long)-1 : 0;
3523         } else {
3524                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3525                                          lost_events);
3526         }
3527
3528         if (event) {
3529                 iter->ent_size = ring_buffer_event_length(event);
3530                 return ring_buffer_event_data(event);
3531         }
3532         iter->ent_size = 0;
3533         return NULL;
3534 }
3535
3536 static struct trace_entry *
3537 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3538                   unsigned long *missing_events, u64 *ent_ts)
3539 {
3540         struct trace_buffer *buffer = iter->array_buffer->buffer;
3541         struct trace_entry *ent, *next = NULL;
3542         unsigned long lost_events = 0, next_lost = 0;
3543         int cpu_file = iter->cpu_file;
3544         u64 next_ts = 0, ts;
3545         int next_cpu = -1;
3546         int next_size = 0;
3547         int cpu;
3548
3549         /*
3550          * If we are in a per_cpu trace file, don't bother by iterating over
3551          * all cpu and peek directly.
3552          */
3553         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3554                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3555                         return NULL;
3556                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3557                 if (ent_cpu)
3558                         *ent_cpu = cpu_file;
3559
3560                 return ent;
3561         }
3562
3563         for_each_tracing_cpu(cpu) {
3564
3565                 if (ring_buffer_empty_cpu(buffer, cpu))
3566                         continue;
3567
3568                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3569
3570                 /*
3571                  * Pick the entry with the smallest timestamp:
3572                  */
3573                 if (ent && (!next || ts < next_ts)) {
3574                         next = ent;
3575                         next_cpu = cpu;
3576                         next_ts = ts;
3577                         next_lost = lost_events;
3578                         next_size = iter->ent_size;
3579                 }
3580         }
3581
3582         iter->ent_size = next_size;
3583
3584         if (ent_cpu)
3585                 *ent_cpu = next_cpu;
3586
3587         if (ent_ts)
3588                 *ent_ts = next_ts;
3589
3590         if (missing_events)
3591                 *missing_events = next_lost;
3592
3593         return next;
3594 }
3595
3596 #define STATIC_FMT_BUF_SIZE     128
3597 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3598
3599 static char *trace_iter_expand_format(struct trace_iterator *iter)
3600 {
3601         char *tmp;
3602
3603         /*
3604          * iter->tr is NULL when used with tp_printk, which makes
3605          * this get called where it is not safe to call krealloc().
3606          */
3607         if (!iter->tr || iter->fmt == static_fmt_buf)
3608                 return NULL;
3609
3610         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3611                        GFP_KERNEL);
3612         if (tmp) {
3613                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3614                 iter->fmt = tmp;
3615         }
3616
3617         return tmp;
3618 }
3619
3620 /* Returns true if the string is safe to dereference from an event */
3621 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3622 {
3623         unsigned long addr = (unsigned long)str;
3624         struct trace_event *trace_event;
3625         struct trace_event_call *event;
3626
3627         /* OK if part of the event data */
3628         if ((addr >= (unsigned long)iter->ent) &&
3629             (addr < (unsigned long)iter->ent + iter->ent_size))
3630                 return true;
3631
3632         /* OK if part of the temp seq buffer */
3633         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3634             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3635                 return true;
3636
3637         /* Core rodata can not be freed */
3638         if (is_kernel_rodata(addr))
3639                 return true;
3640
3641         if (trace_is_tracepoint_string(str))
3642                 return true;
3643
3644         /*
3645          * Now this could be a module event, referencing core module
3646          * data, which is OK.
3647          */
3648         if (!iter->ent)
3649                 return false;
3650
3651         trace_event = ftrace_find_event(iter->ent->type);
3652         if (!trace_event)
3653                 return false;
3654
3655         event = container_of(trace_event, struct trace_event_call, event);
3656         if (!event->mod)
3657                 return false;
3658
3659         /* Would rather have rodata, but this will suffice */
3660         if (within_module_core(addr, event->mod))
3661                 return true;
3662
3663         return false;
3664 }
3665
3666 static const char *show_buffer(struct trace_seq *s)
3667 {
3668         struct seq_buf *seq = &s->seq;
3669
3670         seq_buf_terminate(seq);
3671
3672         return seq->buffer;
3673 }
3674
3675 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3676
3677 static int test_can_verify_check(const char *fmt, ...)
3678 {
3679         char buf[16];
3680         va_list ap;
3681         int ret;
3682
3683         /*
3684          * The verifier is dependent on vsnprintf() modifies the va_list
3685          * passed to it, where it is sent as a reference. Some architectures
3686          * (like x86_32) passes it by value, which means that vsnprintf()
3687          * does not modify the va_list passed to it, and the verifier
3688          * would then need to be able to understand all the values that
3689          * vsnprintf can use. If it is passed by value, then the verifier
3690          * is disabled.
3691          */
3692         va_start(ap, fmt);
3693         vsnprintf(buf, 16, "%d", ap);
3694         ret = va_arg(ap, int);
3695         va_end(ap);
3696
3697         return ret;
3698 }
3699
3700 static void test_can_verify(void)
3701 {
3702         if (!test_can_verify_check("%d %d", 0, 1)) {
3703                 pr_info("trace event string verifier disabled\n");
3704                 static_branch_inc(&trace_no_verify);
3705         }
3706 }
3707
3708 /**
3709  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3710  * @iter: The iterator that holds the seq buffer and the event being printed
3711  * @fmt: The format used to print the event
3712  * @ap: The va_list holding the data to print from @fmt.
3713  *
3714  * This writes the data into the @iter->seq buffer using the data from
3715  * @fmt and @ap. If the format has a %s, then the source of the string
3716  * is examined to make sure it is safe to print, otherwise it will
3717  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3718  * pointer.
3719  */
3720 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3721                          va_list ap)
3722 {
3723         const char *p = fmt;
3724         const char *str;
3725         int i, j;
3726
3727         if (WARN_ON_ONCE(!fmt))
3728                 return;
3729
3730         if (static_branch_unlikely(&trace_no_verify))
3731                 goto print;
3732
3733         /* Don't bother checking when doing a ftrace_dump() */
3734         if (iter->fmt == static_fmt_buf)
3735                 goto print;
3736
3737         while (*p) {
3738                 bool star = false;
3739                 int len = 0;
3740
3741                 j = 0;
3742
3743                 /* We only care about %s and variants */
3744                 for (i = 0; p[i]; i++) {
3745                         if (i + 1 >= iter->fmt_size) {
3746                                 /*
3747                                  * If we can't expand the copy buffer,
3748                                  * just print it.
3749                                  */
3750                                 if (!trace_iter_expand_format(iter))
3751                                         goto print;
3752                         }
3753
3754                         if (p[i] == '\\' && p[i+1]) {
3755                                 i++;
3756                                 continue;
3757                         }
3758                         if (p[i] == '%') {
3759                                 /* Need to test cases like %08.*s */
3760                                 for (j = 1; p[i+j]; j++) {
3761                                         if (isdigit(p[i+j]) ||
3762                                             p[i+j] == '.')
3763                                                 continue;
3764                                         if (p[i+j] == '*') {
3765                                                 star = true;
3766                                                 continue;
3767                                         }
3768                                         break;
3769                                 }
3770                                 if (p[i+j] == 's')
3771                                         break;
3772                                 star = false;
3773                         }
3774                         j = 0;
3775                 }
3776                 /* If no %s found then just print normally */
3777                 if (!p[i])
3778                         break;
3779
3780                 /* Copy up to the %s, and print that */
3781                 strncpy(iter->fmt, p, i);
3782                 iter->fmt[i] = '\0';
3783                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3784
3785                 if (star)
3786                         len = va_arg(ap, int);
3787
3788                 /* The ap now points to the string data of the %s */
3789                 str = va_arg(ap, const char *);
3790
3791                 /*
3792                  * If you hit this warning, it is likely that the
3793                  * trace event in question used %s on a string that
3794                  * was saved at the time of the event, but may not be
3795                  * around when the trace is read. Use __string(),
3796                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3797                  * instead. See samples/trace_events/trace-events-sample.h
3798                  * for reference.
3799                  */
3800                 if (WARN_ONCE(!trace_safe_str(iter, str),
3801                               "fmt: '%s' current_buffer: '%s'",
3802                               fmt, show_buffer(&iter->seq))) {
3803                         int ret;
3804
3805                         /* Try to safely read the string */
3806                         if (star) {
3807                                 if (len + 1 > iter->fmt_size)
3808                                         len = iter->fmt_size - 1;
3809                                 if (len < 0)
3810                                         len = 0;
3811                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3812                                 iter->fmt[len] = 0;
3813                                 star = false;
3814                         } else {
3815                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3816                                                                   iter->fmt_size);
3817                         }
3818                         if (ret < 0)
3819                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3820                         else
3821                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3822                                                  str, iter->fmt);
3823                         str = "[UNSAFE-MEMORY]";
3824                         strcpy(iter->fmt, "%s");
3825                 } else {
3826                         strncpy(iter->fmt, p + i, j + 1);
3827                         iter->fmt[j+1] = '\0';
3828                 }
3829                 if (star)
3830                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3831                 else
3832                         trace_seq_printf(&iter->seq, iter->fmt, str);
3833
3834                 p += i + j + 1;
3835         }
3836  print:
3837         if (*p)
3838                 trace_seq_vprintf(&iter->seq, p, ap);
3839 }
3840
3841 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3842 {
3843         const char *p, *new_fmt;
3844         char *q;
3845
3846         if (WARN_ON_ONCE(!fmt))
3847                 return fmt;
3848
3849         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3850                 return fmt;
3851
3852         p = fmt;
3853         new_fmt = q = iter->fmt;
3854         while (*p) {
3855                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3856                         if (!trace_iter_expand_format(iter))
3857                                 return fmt;
3858
3859                         q += iter->fmt - new_fmt;
3860                         new_fmt = iter->fmt;
3861                 }
3862
3863                 *q++ = *p++;
3864
3865                 /* Replace %p with %px */
3866                 if (p[-1] == '%') {
3867                         if (p[0] == '%') {
3868                                 *q++ = *p++;
3869                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3870                                 *q++ = *p++;
3871                                 *q++ = 'x';
3872                         }
3873                 }
3874         }
3875         *q = '\0';
3876
3877         return new_fmt;
3878 }
3879
3880 #define STATIC_TEMP_BUF_SIZE    128
3881 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3882
3883 /* Find the next real entry, without updating the iterator itself */
3884 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3885                                           int *ent_cpu, u64 *ent_ts)
3886 {
3887         /* __find_next_entry will reset ent_size */
3888         int ent_size = iter->ent_size;
3889         struct trace_entry *entry;
3890
3891         /*
3892          * If called from ftrace_dump(), then the iter->temp buffer
3893          * will be the static_temp_buf and not created from kmalloc.
3894          * If the entry size is greater than the buffer, we can
3895          * not save it. Just return NULL in that case. This is only
3896          * used to add markers when two consecutive events' time
3897          * stamps have a large delta. See trace_print_lat_context()
3898          */
3899         if (iter->temp == static_temp_buf &&
3900             STATIC_TEMP_BUF_SIZE < ent_size)
3901                 return NULL;
3902
3903         /*
3904          * The __find_next_entry() may call peek_next_entry(), which may
3905          * call ring_buffer_peek() that may make the contents of iter->ent
3906          * undefined. Need to copy iter->ent now.
3907          */
3908         if (iter->ent && iter->ent != iter->temp) {
3909                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3910                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3911                         void *temp;
3912                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3913                         if (!temp)
3914                                 return NULL;
3915                         kfree(iter->temp);
3916                         iter->temp = temp;
3917                         iter->temp_size = iter->ent_size;
3918                 }
3919                 memcpy(iter->temp, iter->ent, iter->ent_size);
3920                 iter->ent = iter->temp;
3921         }
3922         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3923         /* Put back the original ent_size */
3924         iter->ent_size = ent_size;
3925
3926         return entry;
3927 }
3928
3929 /* Find the next real entry, and increment the iterator to the next entry */
3930 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3931 {
3932         iter->ent = __find_next_entry(iter, &iter->cpu,
3933                                       &iter->lost_events, &iter->ts);
3934
3935         if (iter->ent)
3936                 trace_iterator_increment(iter);
3937
3938         return iter->ent ? iter : NULL;
3939 }
3940
3941 static void trace_consume(struct trace_iterator *iter)
3942 {
3943         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3944                             &iter->lost_events);
3945 }
3946
3947 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3948 {
3949         struct trace_iterator *iter = m->private;
3950         int i = (int)*pos;
3951         void *ent;
3952
3953         WARN_ON_ONCE(iter->leftover);
3954
3955         (*pos)++;
3956
3957         /* can't go backwards */
3958         if (iter->idx > i)
3959                 return NULL;
3960
3961         if (iter->idx < 0)
3962                 ent = trace_find_next_entry_inc(iter);
3963         else
3964                 ent = iter;
3965
3966         while (ent && iter->idx < i)
3967                 ent = trace_find_next_entry_inc(iter);
3968
3969         iter->pos = *pos;
3970
3971         return ent;
3972 }
3973
3974 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3975 {
3976         struct ring_buffer_iter *buf_iter;
3977         unsigned long entries = 0;
3978         u64 ts;
3979
3980         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3981
3982         buf_iter = trace_buffer_iter(iter, cpu);
3983         if (!buf_iter)
3984                 return;
3985
3986         ring_buffer_iter_reset(buf_iter);
3987
3988         /*
3989          * We could have the case with the max latency tracers
3990          * that a reset never took place on a cpu. This is evident
3991          * by the timestamp being before the start of the buffer.
3992          */
3993         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3994                 if (ts >= iter->array_buffer->time_start)
3995                         break;
3996                 entries++;
3997                 ring_buffer_iter_advance(buf_iter);
3998         }
3999
4000         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4001 }
4002
4003 /*
4004  * The current tracer is copied to avoid a global locking
4005  * all around.
4006  */
4007 static void *s_start(struct seq_file *m, loff_t *pos)
4008 {
4009         struct trace_iterator *iter = m->private;
4010         struct trace_array *tr = iter->tr;
4011         int cpu_file = iter->cpu_file;
4012         void *p = NULL;
4013         loff_t l = 0;
4014         int cpu;
4015
4016         /*
4017          * copy the tracer to avoid using a global lock all around.
4018          * iter->trace is a copy of current_trace, the pointer to the
4019          * name may be used instead of a strcmp(), as iter->trace->name
4020          * will point to the same string as current_trace->name.
4021          */
4022         mutex_lock(&trace_types_lock);
4023         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4024                 *iter->trace = *tr->current_trace;
4025         mutex_unlock(&trace_types_lock);
4026
4027 #ifdef CONFIG_TRACER_MAX_TRACE
4028         if (iter->snapshot && iter->trace->use_max_tr)
4029                 return ERR_PTR(-EBUSY);
4030 #endif
4031
4032         if (*pos != iter->pos) {
4033                 iter->ent = NULL;
4034                 iter->cpu = 0;
4035                 iter->idx = -1;
4036
4037                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4038                         for_each_tracing_cpu(cpu)
4039                                 tracing_iter_reset(iter, cpu);
4040                 } else
4041                         tracing_iter_reset(iter, cpu_file);
4042
4043                 iter->leftover = 0;
4044                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4045                         ;
4046
4047         } else {
4048                 /*
4049                  * If we overflowed the seq_file before, then we want
4050                  * to just reuse the trace_seq buffer again.
4051                  */
4052                 if (iter->leftover)
4053                         p = iter;
4054                 else {
4055                         l = *pos - 1;
4056                         p = s_next(m, p, &l);
4057                 }
4058         }
4059
4060         trace_event_read_lock();
4061         trace_access_lock(cpu_file);
4062         return p;
4063 }
4064
4065 static void s_stop(struct seq_file *m, void *p)
4066 {
4067         struct trace_iterator *iter = m->private;
4068
4069 #ifdef CONFIG_TRACER_MAX_TRACE
4070         if (iter->snapshot && iter->trace->use_max_tr)
4071                 return;
4072 #endif
4073
4074         trace_access_unlock(iter->cpu_file);
4075         trace_event_read_unlock();
4076 }
4077
4078 static void
4079 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4080                       unsigned long *entries, int cpu)
4081 {
4082         unsigned long count;
4083
4084         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4085         /*
4086          * If this buffer has skipped entries, then we hold all
4087          * entries for the trace and we need to ignore the
4088          * ones before the time stamp.
4089          */
4090         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4091                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4092                 /* total is the same as the entries */
4093                 *total = count;
4094         } else
4095                 *total = count +
4096                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4097         *entries = count;
4098 }
4099
4100 static void
4101 get_total_entries(struct array_buffer *buf,
4102                   unsigned long *total, unsigned long *entries)
4103 {
4104         unsigned long t, e;
4105         int cpu;
4106
4107         *total = 0;
4108         *entries = 0;
4109
4110         for_each_tracing_cpu(cpu) {
4111                 get_total_entries_cpu(buf, &t, &e, cpu);
4112                 *total += t;
4113                 *entries += e;
4114         }
4115 }
4116
4117 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4118 {
4119         unsigned long total, entries;
4120
4121         if (!tr)
4122                 tr = &global_trace;
4123
4124         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4125
4126         return entries;
4127 }
4128
4129 unsigned long trace_total_entries(struct trace_array *tr)
4130 {
4131         unsigned long total, entries;
4132
4133         if (!tr)
4134                 tr = &global_trace;
4135
4136         get_total_entries(&tr->array_buffer, &total, &entries);
4137
4138         return entries;
4139 }
4140
4141 static void print_lat_help_header(struct seq_file *m)
4142 {
4143         seq_puts(m, "#                    _------=> CPU#            \n"
4144                     "#                   / _-----=> irqs-off        \n"
4145                     "#                  | / _----=> need-resched    \n"
4146                     "#                  || / _---=> hardirq/softirq \n"
4147                     "#                  ||| / _--=> preempt-depth   \n"
4148                     "#                  |||| /     delay            \n"
4149                     "#  cmd     pid     ||||| time  |   caller      \n"
4150                     "#     \\   /        |||||  \\    |   /         \n");
4151 }
4152
4153 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4154 {
4155         unsigned long total;
4156         unsigned long entries;
4157
4158         get_total_entries(buf, &total, &entries);
4159         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4160                    entries, total, num_online_cpus());
4161         seq_puts(m, "#\n");
4162 }
4163
4164 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4165                                    unsigned int flags)
4166 {
4167         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4168
4169         print_event_info(buf, m);
4170
4171         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4172         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4173 }
4174
4175 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4176                                        unsigned int flags)
4177 {
4178         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4179         const char *space = "            ";
4180         int prec = tgid ? 12 : 2;
4181
4182         print_event_info(buf, m);
4183
4184         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4185         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4186         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4187         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4188         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4189         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4190         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4191 }
4192
4193 void
4194 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4195 {
4196         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4197         struct array_buffer *buf = iter->array_buffer;
4198         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4199         struct tracer *type = iter->trace;
4200         unsigned long entries;
4201         unsigned long total;
4202         const char *name = "preemption";
4203
4204         name = type->name;
4205
4206         get_total_entries(buf, &total, &entries);
4207
4208         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4209                    name, UTS_RELEASE);
4210         seq_puts(m, "# -----------------------------------"
4211                  "---------------------------------\n");
4212         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4213                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4214                    nsecs_to_usecs(data->saved_latency),
4215                    entries,
4216                    total,
4217                    buf->cpu,
4218 #if defined(CONFIG_PREEMPT_NONE)
4219                    "server",
4220 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4221                    "desktop",
4222 #elif defined(CONFIG_PREEMPT)
4223                    "preempt",
4224 #elif defined(CONFIG_PREEMPT_RT)
4225                    "preempt_rt",
4226 #else
4227                    "unknown",
4228 #endif
4229                    /* These are reserved for later use */
4230                    0, 0, 0, 0);
4231 #ifdef CONFIG_SMP
4232         seq_printf(m, " #P:%d)\n", num_online_cpus());
4233 #else
4234         seq_puts(m, ")\n");
4235 #endif
4236         seq_puts(m, "#    -----------------\n");
4237         seq_printf(m, "#    | task: %.16s-%d "
4238                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4239                    data->comm, data->pid,
4240                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4241                    data->policy, data->rt_priority);
4242         seq_puts(m, "#    -----------------\n");
4243
4244         if (data->critical_start) {
4245                 seq_puts(m, "#  => started at: ");
4246                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4247                 trace_print_seq(m, &iter->seq);
4248                 seq_puts(m, "\n#  => ended at:   ");
4249                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4250                 trace_print_seq(m, &iter->seq);
4251                 seq_puts(m, "\n#\n");
4252         }
4253
4254         seq_puts(m, "#\n");
4255 }
4256
4257 static void test_cpu_buff_start(struct trace_iterator *iter)
4258 {
4259         struct trace_seq *s = &iter->seq;
4260         struct trace_array *tr = iter->tr;
4261
4262         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4263                 return;
4264
4265         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4266                 return;
4267
4268         if (cpumask_available(iter->started) &&
4269             cpumask_test_cpu(iter->cpu, iter->started))
4270                 return;
4271
4272         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4273                 return;
4274
4275         if (cpumask_available(iter->started))
4276                 cpumask_set_cpu(iter->cpu, iter->started);
4277
4278         /* Don't print started cpu buffer for the first entry of the trace */
4279         if (iter->idx > 1)
4280                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4281                                 iter->cpu);
4282 }
4283
4284 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4285 {
4286         struct trace_array *tr = iter->tr;
4287         struct trace_seq *s = &iter->seq;
4288         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4289         struct trace_entry *entry;
4290         struct trace_event *event;
4291
4292         entry = iter->ent;
4293
4294         test_cpu_buff_start(iter);
4295
4296         event = ftrace_find_event(entry->type);
4297
4298         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4299                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4300                         trace_print_lat_context(iter);
4301                 else
4302                         trace_print_context(iter);
4303         }
4304
4305         if (trace_seq_has_overflowed(s))
4306                 return TRACE_TYPE_PARTIAL_LINE;
4307
4308         if (event)
4309                 return event->funcs->trace(iter, sym_flags, event);
4310
4311         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4312
4313         return trace_handle_return(s);
4314 }
4315
4316 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4317 {
4318         struct trace_array *tr = iter->tr;
4319         struct trace_seq *s = &iter->seq;
4320         struct trace_entry *entry;
4321         struct trace_event *event;
4322
4323         entry = iter->ent;
4324
4325         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4326                 trace_seq_printf(s, "%d %d %llu ",
4327                                  entry->pid, iter->cpu, iter->ts);
4328
4329         if (trace_seq_has_overflowed(s))
4330                 return TRACE_TYPE_PARTIAL_LINE;
4331
4332         event = ftrace_find_event(entry->type);
4333         if (event)
4334                 return event->funcs->raw(iter, 0, event);
4335
4336         trace_seq_printf(s, "%d ?\n", entry->type);
4337
4338         return trace_handle_return(s);
4339 }
4340
4341 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4342 {
4343         struct trace_array *tr = iter->tr;
4344         struct trace_seq *s = &iter->seq;
4345         unsigned char newline = '\n';
4346         struct trace_entry *entry;
4347         struct trace_event *event;
4348
4349         entry = iter->ent;
4350
4351         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4352                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4353                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4354                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4355                 if (trace_seq_has_overflowed(s))
4356                         return TRACE_TYPE_PARTIAL_LINE;
4357         }
4358
4359         event = ftrace_find_event(entry->type);
4360         if (event) {
4361                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4362                 if (ret != TRACE_TYPE_HANDLED)
4363                         return ret;
4364         }
4365
4366         SEQ_PUT_FIELD(s, newline);
4367
4368         return trace_handle_return(s);
4369 }
4370
4371 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4372 {
4373         struct trace_array *tr = iter->tr;
4374         struct trace_seq *s = &iter->seq;
4375         struct trace_entry *entry;
4376         struct trace_event *event;
4377
4378         entry = iter->ent;
4379
4380         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4381                 SEQ_PUT_FIELD(s, entry->pid);
4382                 SEQ_PUT_FIELD(s, iter->cpu);
4383                 SEQ_PUT_FIELD(s, iter->ts);
4384                 if (trace_seq_has_overflowed(s))
4385                         return TRACE_TYPE_PARTIAL_LINE;
4386         }
4387
4388         event = ftrace_find_event(entry->type);
4389         return event ? event->funcs->binary(iter, 0, event) :
4390                 TRACE_TYPE_HANDLED;
4391 }
4392
4393 int trace_empty(struct trace_iterator *iter)
4394 {
4395         struct ring_buffer_iter *buf_iter;
4396         int cpu;
4397
4398         /* If we are looking at one CPU buffer, only check that one */
4399         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4400                 cpu = iter->cpu_file;
4401                 buf_iter = trace_buffer_iter(iter, cpu);
4402                 if (buf_iter) {
4403                         if (!ring_buffer_iter_empty(buf_iter))
4404                                 return 0;
4405                 } else {
4406                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4407                                 return 0;
4408                 }
4409                 return 1;
4410         }
4411
4412         for_each_tracing_cpu(cpu) {
4413                 buf_iter = trace_buffer_iter(iter, cpu);
4414                 if (buf_iter) {
4415                         if (!ring_buffer_iter_empty(buf_iter))
4416                                 return 0;
4417                 } else {
4418                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4419                                 return 0;
4420                 }
4421         }
4422
4423         return 1;
4424 }
4425
4426 /*  Called with trace_event_read_lock() held. */
4427 enum print_line_t print_trace_line(struct trace_iterator *iter)
4428 {
4429         struct trace_array *tr = iter->tr;
4430         unsigned long trace_flags = tr->trace_flags;
4431         enum print_line_t ret;
4432
4433         if (iter->lost_events) {
4434                 if (iter->lost_events == (unsigned long)-1)
4435                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4436                                          iter->cpu);
4437                 else
4438                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4439                                          iter->cpu, iter->lost_events);
4440                 if (trace_seq_has_overflowed(&iter->seq))
4441                         return TRACE_TYPE_PARTIAL_LINE;
4442         }
4443
4444         if (iter->trace && iter->trace->print_line) {
4445                 ret = iter->trace->print_line(iter);
4446                 if (ret != TRACE_TYPE_UNHANDLED)
4447                         return ret;
4448         }
4449
4450         if (iter->ent->type == TRACE_BPUTS &&
4451                         trace_flags & TRACE_ITER_PRINTK &&
4452                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4453                 return trace_print_bputs_msg_only(iter);
4454
4455         if (iter->ent->type == TRACE_BPRINT &&
4456                         trace_flags & TRACE_ITER_PRINTK &&
4457                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4458                 return trace_print_bprintk_msg_only(iter);
4459
4460         if (iter->ent->type == TRACE_PRINT &&
4461                         trace_flags & TRACE_ITER_PRINTK &&
4462                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4463                 return trace_print_printk_msg_only(iter);
4464
4465         if (trace_flags & TRACE_ITER_BIN)
4466                 return print_bin_fmt(iter);
4467
4468         if (trace_flags & TRACE_ITER_HEX)
4469                 return print_hex_fmt(iter);
4470
4471         if (trace_flags & TRACE_ITER_RAW)
4472                 return print_raw_fmt(iter);
4473
4474         return print_trace_fmt(iter);
4475 }
4476
4477 void trace_latency_header(struct seq_file *m)
4478 {
4479         struct trace_iterator *iter = m->private;
4480         struct trace_array *tr = iter->tr;
4481
4482         /* print nothing if the buffers are empty */
4483         if (trace_empty(iter))
4484                 return;
4485
4486         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4487                 print_trace_header(m, iter);
4488
4489         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4490                 print_lat_help_header(m);
4491 }
4492
4493 void trace_default_header(struct seq_file *m)
4494 {
4495         struct trace_iterator *iter = m->private;
4496         struct trace_array *tr = iter->tr;
4497         unsigned long trace_flags = tr->trace_flags;
4498
4499         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4500                 return;
4501
4502         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4503                 /* print nothing if the buffers are empty */
4504                 if (trace_empty(iter))
4505                         return;
4506                 print_trace_header(m, iter);
4507                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4508                         print_lat_help_header(m);
4509         } else {
4510                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4511                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4512                                 print_func_help_header_irq(iter->array_buffer,
4513                                                            m, trace_flags);
4514                         else
4515                                 print_func_help_header(iter->array_buffer, m,
4516                                                        trace_flags);
4517                 }
4518         }
4519 }
4520
4521 static void test_ftrace_alive(struct seq_file *m)
4522 {
4523         if (!ftrace_is_dead())
4524                 return;
4525         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4526                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4527 }
4528
4529 #ifdef CONFIG_TRACER_MAX_TRACE
4530 static void show_snapshot_main_help(struct seq_file *m)
4531 {
4532         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4533                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4534                     "#                      Takes a snapshot of the main buffer.\n"
4535                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4536                     "#                      (Doesn't have to be '2' works with any number that\n"
4537                     "#                       is not a '0' or '1')\n");
4538 }
4539
4540 static void show_snapshot_percpu_help(struct seq_file *m)
4541 {
4542         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4543 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4544         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4545                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4546 #else
4547         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4548                     "#                     Must use main snapshot file to allocate.\n");
4549 #endif
4550         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4551                     "#                      (Doesn't have to be '2' works with any number that\n"
4552                     "#                       is not a '0' or '1')\n");
4553 }
4554
4555 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4556 {
4557         if (iter->tr->allocated_snapshot)
4558                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4559         else
4560                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4561
4562         seq_puts(m, "# Snapshot commands:\n");
4563         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4564                 show_snapshot_main_help(m);
4565         else
4566                 show_snapshot_percpu_help(m);
4567 }
4568 #else
4569 /* Should never be called */
4570 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4571 #endif
4572
4573 static int s_show(struct seq_file *m, void *v)
4574 {
4575         struct trace_iterator *iter = v;
4576         int ret;
4577
4578         if (iter->ent == NULL) {
4579                 if (iter->tr) {
4580                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4581                         seq_puts(m, "#\n");
4582                         test_ftrace_alive(m);
4583                 }
4584                 if (iter->snapshot && trace_empty(iter))
4585                         print_snapshot_help(m, iter);
4586                 else if (iter->trace && iter->trace->print_header)
4587                         iter->trace->print_header(m);
4588                 else
4589                         trace_default_header(m);
4590
4591         } else if (iter->leftover) {
4592                 /*
4593                  * If we filled the seq_file buffer earlier, we
4594                  * want to just show it now.
4595                  */
4596                 ret = trace_print_seq(m, &iter->seq);
4597
4598                 /* ret should this time be zero, but you never know */
4599                 iter->leftover = ret;
4600
4601         } else {
4602                 print_trace_line(iter);
4603                 ret = trace_print_seq(m, &iter->seq);
4604                 /*
4605                  * If we overflow the seq_file buffer, then it will
4606                  * ask us for this data again at start up.
4607                  * Use that instead.
4608                  *  ret is 0 if seq_file write succeeded.
4609                  *        -1 otherwise.
4610                  */
4611                 iter->leftover = ret;
4612         }
4613
4614         return 0;
4615 }
4616
4617 /*
4618  * Should be used after trace_array_get(), trace_types_lock
4619  * ensures that i_cdev was already initialized.
4620  */
4621 static inline int tracing_get_cpu(struct inode *inode)
4622 {
4623         if (inode->i_cdev) /* See trace_create_cpu_file() */
4624                 return (long)inode->i_cdev - 1;
4625         return RING_BUFFER_ALL_CPUS;
4626 }
4627
4628 static const struct seq_operations tracer_seq_ops = {
4629         .start          = s_start,
4630         .next           = s_next,
4631         .stop           = s_stop,
4632         .show           = s_show,
4633 };
4634
4635 static struct trace_iterator *
4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4637 {
4638         struct trace_array *tr = inode->i_private;
4639         struct trace_iterator *iter;
4640         int cpu;
4641
4642         if (tracing_disabled)
4643                 return ERR_PTR(-ENODEV);
4644
4645         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4646         if (!iter)
4647                 return ERR_PTR(-ENOMEM);
4648
4649         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4650                                     GFP_KERNEL);
4651         if (!iter->buffer_iter)
4652                 goto release;
4653
4654         /*
4655          * trace_find_next_entry() may need to save off iter->ent.
4656          * It will place it into the iter->temp buffer. As most
4657          * events are less than 128, allocate a buffer of that size.
4658          * If one is greater, then trace_find_next_entry() will
4659          * allocate a new buffer to adjust for the bigger iter->ent.
4660          * It's not critical if it fails to get allocated here.
4661          */
4662         iter->temp = kmalloc(128, GFP_KERNEL);
4663         if (iter->temp)
4664                 iter->temp_size = 128;
4665
4666         /*
4667          * trace_event_printf() may need to modify given format
4668          * string to replace %p with %px so that it shows real address
4669          * instead of hash value. However, that is only for the event
4670          * tracing, other tracer may not need. Defer the allocation
4671          * until it is needed.
4672          */
4673         iter->fmt = NULL;
4674         iter->fmt_size = 0;
4675
4676         /*
4677          * We make a copy of the current tracer to avoid concurrent
4678          * changes on it while we are reading.
4679          */
4680         mutex_lock(&trace_types_lock);
4681         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4682         if (!iter->trace)
4683                 goto fail;
4684
4685         *iter->trace = *tr->current_trace;
4686
4687         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4688                 goto fail;
4689
4690         iter->tr = tr;
4691
4692 #ifdef CONFIG_TRACER_MAX_TRACE
4693         /* Currently only the top directory has a snapshot */
4694         if (tr->current_trace->print_max || snapshot)
4695                 iter->array_buffer = &tr->max_buffer;
4696         else
4697 #endif
4698                 iter->array_buffer = &tr->array_buffer;
4699         iter->snapshot = snapshot;
4700         iter->pos = -1;
4701         iter->cpu_file = tracing_get_cpu(inode);
4702         mutex_init(&iter->mutex);
4703
4704         /* Notify the tracer early; before we stop tracing. */
4705         if (iter->trace->open)
4706                 iter->trace->open(iter);
4707
4708         /* Annotate start of buffers if we had overruns */
4709         if (ring_buffer_overruns(iter->array_buffer->buffer))
4710                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4711
4712         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4713         if (trace_clocks[tr->clock_id].in_ns)
4714                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4715
4716         /*
4717          * If pause-on-trace is enabled, then stop the trace while
4718          * dumping, unless this is the "snapshot" file
4719          */
4720         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4721                 tracing_stop_tr(tr);
4722
4723         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4724                 for_each_tracing_cpu(cpu) {
4725                         iter->buffer_iter[cpu] =
4726                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4727                                                          cpu, GFP_KERNEL);
4728                 }
4729                 ring_buffer_read_prepare_sync();
4730                 for_each_tracing_cpu(cpu) {
4731                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4732                         tracing_iter_reset(iter, cpu);
4733                 }
4734         } else {
4735                 cpu = iter->cpu_file;
4736                 iter->buffer_iter[cpu] =
4737                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4738                                                  cpu, GFP_KERNEL);
4739                 ring_buffer_read_prepare_sync();
4740                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4741                 tracing_iter_reset(iter, cpu);
4742         }
4743
4744         mutex_unlock(&trace_types_lock);
4745
4746         return iter;
4747
4748  fail:
4749         mutex_unlock(&trace_types_lock);
4750         kfree(iter->trace);
4751         kfree(iter->temp);
4752         kfree(iter->buffer_iter);
4753 release:
4754         seq_release_private(inode, file);
4755         return ERR_PTR(-ENOMEM);
4756 }
4757
4758 int tracing_open_generic(struct inode *inode, struct file *filp)
4759 {
4760         int ret;
4761
4762         ret = tracing_check_open_get_tr(NULL);
4763         if (ret)
4764                 return ret;
4765
4766         filp->private_data = inode->i_private;
4767         return 0;
4768 }
4769
4770 bool tracing_is_disabled(void)
4771 {
4772         return (tracing_disabled) ? true: false;
4773 }
4774
4775 /*
4776  * Open and update trace_array ref count.
4777  * Must have the current trace_array passed to it.
4778  */
4779 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4780 {
4781         struct trace_array *tr = inode->i_private;
4782         int ret;
4783
4784         ret = tracing_check_open_get_tr(tr);
4785         if (ret)
4786                 return ret;
4787
4788         filp->private_data = inode->i_private;
4789
4790         return 0;
4791 }
4792
4793 static int tracing_release(struct inode *inode, struct file *file)
4794 {
4795         struct trace_array *tr = inode->i_private;
4796         struct seq_file *m = file->private_data;
4797         struct trace_iterator *iter;
4798         int cpu;
4799
4800         if (!(file->f_mode & FMODE_READ)) {
4801                 trace_array_put(tr);
4802                 return 0;
4803         }
4804
4805         /* Writes do not use seq_file */
4806         iter = m->private;
4807         mutex_lock(&trace_types_lock);
4808
4809         for_each_tracing_cpu(cpu) {
4810                 if (iter->buffer_iter[cpu])
4811                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4812         }
4813
4814         if (iter->trace && iter->trace->close)
4815                 iter->trace->close(iter);
4816
4817         if (!iter->snapshot && tr->stop_count)
4818                 /* reenable tracing if it was previously enabled */
4819                 tracing_start_tr(tr);
4820
4821         __trace_array_put(tr);
4822
4823         mutex_unlock(&trace_types_lock);
4824
4825         mutex_destroy(&iter->mutex);
4826         free_cpumask_var(iter->started);
4827         kfree(iter->fmt);
4828         kfree(iter->temp);
4829         kfree(iter->trace);
4830         kfree(iter->buffer_iter);
4831         seq_release_private(inode, file);
4832
4833         return 0;
4834 }
4835
4836 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4837 {
4838         struct trace_array *tr = inode->i_private;
4839
4840         trace_array_put(tr);
4841         return 0;
4842 }
4843
4844 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4845 {
4846         struct trace_array *tr = inode->i_private;
4847
4848         trace_array_put(tr);
4849
4850         return single_release(inode, file);
4851 }
4852
4853 static int tracing_open(struct inode *inode, struct file *file)
4854 {
4855         struct trace_array *tr = inode->i_private;
4856         struct trace_iterator *iter;
4857         int ret;
4858
4859         ret = tracing_check_open_get_tr(tr);
4860         if (ret)
4861                 return ret;
4862
4863         /* If this file was open for write, then erase contents */
4864         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4865                 int cpu = tracing_get_cpu(inode);
4866                 struct array_buffer *trace_buf = &tr->array_buffer;
4867
4868 #ifdef CONFIG_TRACER_MAX_TRACE
4869                 if (tr->current_trace->print_max)
4870                         trace_buf = &tr->max_buffer;
4871 #endif
4872
4873                 if (cpu == RING_BUFFER_ALL_CPUS)
4874                         tracing_reset_online_cpus(trace_buf);
4875                 else
4876                         tracing_reset_cpu(trace_buf, cpu);
4877         }
4878
4879         if (file->f_mode & FMODE_READ) {
4880                 iter = __tracing_open(inode, file, false);
4881                 if (IS_ERR(iter))
4882                         ret = PTR_ERR(iter);
4883                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4884                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4885         }
4886
4887         if (ret < 0)
4888                 trace_array_put(tr);
4889
4890         return ret;
4891 }
4892
4893 /*
4894  * Some tracers are not suitable for instance buffers.
4895  * A tracer is always available for the global array (toplevel)
4896  * or if it explicitly states that it is.
4897  */
4898 static bool
4899 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4900 {
4901         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4902 }
4903
4904 /* Find the next tracer that this trace array may use */
4905 static struct tracer *
4906 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4907 {
4908         while (t && !trace_ok_for_array(t, tr))
4909                 t = t->next;
4910
4911         return t;
4912 }
4913
4914 static void *
4915 t_next(struct seq_file *m, void *v, loff_t *pos)
4916 {
4917         struct trace_array *tr = m->private;
4918         struct tracer *t = v;
4919
4920         (*pos)++;
4921
4922         if (t)
4923                 t = get_tracer_for_array(tr, t->next);
4924
4925         return t;
4926 }
4927
4928 static void *t_start(struct seq_file *m, loff_t *pos)
4929 {
4930         struct trace_array *tr = m->private;
4931         struct tracer *t;
4932         loff_t l = 0;
4933
4934         mutex_lock(&trace_types_lock);
4935
4936         t = get_tracer_for_array(tr, trace_types);
4937         for (; t && l < *pos; t = t_next(m, t, &l))
4938                         ;
4939
4940         return t;
4941 }
4942
4943 static void t_stop(struct seq_file *m, void *p)
4944 {
4945         mutex_unlock(&trace_types_lock);
4946 }
4947
4948 static int t_show(struct seq_file *m, void *v)
4949 {
4950         struct tracer *t = v;
4951
4952         if (!t)
4953                 return 0;
4954
4955         seq_puts(m, t->name);
4956         if (t->next)
4957                 seq_putc(m, ' ');
4958         else
4959                 seq_putc(m, '\n');
4960
4961         return 0;
4962 }
4963
4964 static const struct seq_operations show_traces_seq_ops = {
4965         .start          = t_start,
4966         .next           = t_next,
4967         .stop           = t_stop,
4968         .show           = t_show,
4969 };
4970
4971 static int show_traces_open(struct inode *inode, struct file *file)
4972 {
4973         struct trace_array *tr = inode->i_private;
4974         struct seq_file *m;
4975         int ret;
4976
4977         ret = tracing_check_open_get_tr(tr);
4978         if (ret)
4979                 return ret;
4980
4981         ret = seq_open(file, &show_traces_seq_ops);
4982         if (ret) {
4983                 trace_array_put(tr);
4984                 return ret;
4985         }
4986
4987         m = file->private_data;
4988         m->private = tr;
4989
4990         return 0;
4991 }
4992
4993 static int show_traces_release(struct inode *inode, struct file *file)
4994 {
4995         struct trace_array *tr = inode->i_private;
4996
4997         trace_array_put(tr);
4998         return seq_release(inode, file);
4999 }
5000
5001 static ssize_t
5002 tracing_write_stub(struct file *filp, const char __user *ubuf,
5003                    size_t count, loff_t *ppos)
5004 {
5005         return count;
5006 }
5007
5008 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5009 {
5010         int ret;
5011
5012         if (file->f_mode & FMODE_READ)
5013                 ret = seq_lseek(file, offset, whence);
5014         else
5015                 file->f_pos = ret = 0;
5016
5017         return ret;
5018 }
5019
5020 static const struct file_operations tracing_fops = {
5021         .open           = tracing_open,
5022         .read           = seq_read,
5023         .write          = tracing_write_stub,
5024         .llseek         = tracing_lseek,
5025         .release        = tracing_release,
5026 };
5027
5028 static const struct file_operations show_traces_fops = {
5029         .open           = show_traces_open,
5030         .read           = seq_read,
5031         .llseek         = seq_lseek,
5032         .release        = show_traces_release,
5033 };
5034
5035 static ssize_t
5036 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5037                      size_t count, loff_t *ppos)
5038 {
5039         struct trace_array *tr = file_inode(filp)->i_private;
5040         char *mask_str;
5041         int len;
5042
5043         len = snprintf(NULL, 0, "%*pb\n",
5044                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5045         mask_str = kmalloc(len, GFP_KERNEL);
5046         if (!mask_str)
5047                 return -ENOMEM;
5048
5049         len = snprintf(mask_str, len, "%*pb\n",
5050                        cpumask_pr_args(tr->tracing_cpumask));
5051         if (len >= count) {
5052                 count = -EINVAL;
5053                 goto out_err;
5054         }
5055         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5056
5057 out_err:
5058         kfree(mask_str);
5059
5060         return count;
5061 }
5062
5063 int tracing_set_cpumask(struct trace_array *tr,
5064                         cpumask_var_t tracing_cpumask_new)
5065 {
5066         int cpu;
5067
5068         if (!tr)
5069                 return -EINVAL;
5070
5071         local_irq_disable();
5072         arch_spin_lock(&tr->max_lock);
5073         for_each_tracing_cpu(cpu) {
5074                 /*
5075                  * Increase/decrease the disabled counter if we are
5076                  * about to flip a bit in the cpumask:
5077                  */
5078                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5079                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5080                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5081                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5082                 }
5083                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5084                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5085                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5086                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5087                 }
5088         }
5089         arch_spin_unlock(&tr->max_lock);
5090         local_irq_enable();
5091
5092         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5093
5094         return 0;
5095 }
5096
5097 static ssize_t
5098 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5099                       size_t count, loff_t *ppos)
5100 {
5101         struct trace_array *tr = file_inode(filp)->i_private;
5102         cpumask_var_t tracing_cpumask_new;
5103         int err;
5104
5105         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5106                 return -ENOMEM;
5107
5108         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5109         if (err)
5110                 goto err_free;
5111
5112         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5113         if (err)
5114                 goto err_free;
5115
5116         free_cpumask_var(tracing_cpumask_new);
5117
5118         return count;
5119
5120 err_free:
5121         free_cpumask_var(tracing_cpumask_new);
5122
5123         return err;
5124 }
5125
5126 static const struct file_operations tracing_cpumask_fops = {
5127         .open           = tracing_open_generic_tr,
5128         .read           = tracing_cpumask_read,
5129         .write          = tracing_cpumask_write,
5130         .release        = tracing_release_generic_tr,
5131         .llseek         = generic_file_llseek,
5132 };
5133
5134 static int tracing_trace_options_show(struct seq_file *m, void *v)
5135 {
5136         struct tracer_opt *trace_opts;
5137         struct trace_array *tr = m->private;
5138         u32 tracer_flags;
5139         int i;
5140
5141         mutex_lock(&trace_types_lock);
5142         tracer_flags = tr->current_trace->flags->val;
5143         trace_opts = tr->current_trace->flags->opts;
5144
5145         for (i = 0; trace_options[i]; i++) {
5146                 if (tr->trace_flags & (1 << i))
5147                         seq_printf(m, "%s\n", trace_options[i]);
5148                 else
5149                         seq_printf(m, "no%s\n", trace_options[i]);
5150         }
5151
5152         for (i = 0; trace_opts[i].name; i++) {
5153                 if (tracer_flags & trace_opts[i].bit)
5154                         seq_printf(m, "%s\n", trace_opts[i].name);
5155                 else
5156                         seq_printf(m, "no%s\n", trace_opts[i].name);
5157         }
5158         mutex_unlock(&trace_types_lock);
5159
5160         return 0;
5161 }
5162
5163 static int __set_tracer_option(struct trace_array *tr,
5164                                struct tracer_flags *tracer_flags,
5165                                struct tracer_opt *opts, int neg)
5166 {
5167         struct tracer *trace = tracer_flags->trace;
5168         int ret;
5169
5170         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5171         if (ret)
5172                 return ret;
5173
5174         if (neg)
5175                 tracer_flags->val &= ~opts->bit;
5176         else
5177                 tracer_flags->val |= opts->bit;
5178         return 0;
5179 }
5180
5181 /* Try to assign a tracer specific option */
5182 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5183 {
5184         struct tracer *trace = tr->current_trace;
5185         struct tracer_flags *tracer_flags = trace->flags;
5186         struct tracer_opt *opts = NULL;
5187         int i;
5188
5189         for (i = 0; tracer_flags->opts[i].name; i++) {
5190                 opts = &tracer_flags->opts[i];
5191
5192                 if (strcmp(cmp, opts->name) == 0)
5193                         return __set_tracer_option(tr, trace->flags, opts, neg);
5194         }
5195
5196         return -EINVAL;
5197 }
5198
5199 /* Some tracers require overwrite to stay enabled */
5200 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5201 {
5202         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5203                 return -1;
5204
5205         return 0;
5206 }
5207
5208 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5209 {
5210         int *map;
5211
5212         if ((mask == TRACE_ITER_RECORD_TGID) ||
5213             (mask == TRACE_ITER_RECORD_CMD))
5214                 lockdep_assert_held(&event_mutex);
5215
5216         /* do nothing if flag is already set */
5217         if (!!(tr->trace_flags & mask) == !!enabled)
5218                 return 0;
5219
5220         /* Give the tracer a chance to approve the change */
5221         if (tr->current_trace->flag_changed)
5222                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5223                         return -EINVAL;
5224
5225         if (enabled)
5226                 tr->trace_flags |= mask;
5227         else
5228                 tr->trace_flags &= ~mask;
5229
5230         if (mask == TRACE_ITER_RECORD_CMD)
5231                 trace_event_enable_cmd_record(enabled);
5232
5233         if (mask == TRACE_ITER_RECORD_TGID) {
5234                 if (!tgid_map) {
5235                         tgid_map_max = pid_max;
5236                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5237                                        GFP_KERNEL);
5238
5239                         /*
5240                          * Pairs with smp_load_acquire() in
5241                          * trace_find_tgid_ptr() to ensure that if it observes
5242                          * the tgid_map we just allocated then it also observes
5243                          * the corresponding tgid_map_max value.
5244                          */
5245                         smp_store_release(&tgid_map, map);
5246                 }
5247                 if (!tgid_map) {
5248                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5249                         return -ENOMEM;
5250                 }
5251
5252                 trace_event_enable_tgid_record(enabled);
5253         }
5254
5255         if (mask == TRACE_ITER_EVENT_FORK)
5256                 trace_event_follow_fork(tr, enabled);
5257
5258         if (mask == TRACE_ITER_FUNC_FORK)
5259                 ftrace_pid_follow_fork(tr, enabled);
5260
5261         if (mask == TRACE_ITER_OVERWRITE) {
5262                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5263 #ifdef CONFIG_TRACER_MAX_TRACE
5264                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5265 #endif
5266         }
5267
5268         if (mask == TRACE_ITER_PRINTK) {
5269                 trace_printk_start_stop_comm(enabled);
5270                 trace_printk_control(enabled);
5271         }
5272
5273         return 0;
5274 }
5275
5276 int trace_set_options(struct trace_array *tr, char *option)
5277 {
5278         char *cmp;
5279         int neg = 0;
5280         int ret;
5281         size_t orig_len = strlen(option);
5282         int len;
5283
5284         cmp = strstrip(option);
5285
5286         len = str_has_prefix(cmp, "no");
5287         if (len)
5288                 neg = 1;
5289
5290         cmp += len;
5291
5292         mutex_lock(&event_mutex);
5293         mutex_lock(&trace_types_lock);
5294
5295         ret = match_string(trace_options, -1, cmp);
5296         /* If no option could be set, test the specific tracer options */
5297         if (ret < 0)
5298                 ret = set_tracer_option(tr, cmp, neg);
5299         else
5300                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5301
5302         mutex_unlock(&trace_types_lock);
5303         mutex_unlock(&event_mutex);
5304
5305         /*
5306          * If the first trailing whitespace is replaced with '\0' by strstrip,
5307          * turn it back into a space.
5308          */
5309         if (orig_len > strlen(option))
5310                 option[strlen(option)] = ' ';
5311
5312         return ret;
5313 }
5314
5315 static void __init apply_trace_boot_options(void)
5316 {
5317         char *buf = trace_boot_options_buf;
5318         char *option;
5319
5320         while (true) {
5321                 option = strsep(&buf, ",");
5322
5323                 if (!option)
5324                         break;
5325
5326                 if (*option)
5327                         trace_set_options(&global_trace, option);
5328
5329                 /* Put back the comma to allow this to be called again */
5330                 if (buf)
5331                         *(buf - 1) = ',';
5332         }
5333 }
5334
5335 static ssize_t
5336 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5337                         size_t cnt, loff_t *ppos)
5338 {
5339         struct seq_file *m = filp->private_data;
5340         struct trace_array *tr = m->private;
5341         char buf[64];
5342         int ret;
5343
5344         if (cnt >= sizeof(buf))
5345                 return -EINVAL;
5346
5347         if (copy_from_user(buf, ubuf, cnt))
5348                 return -EFAULT;
5349
5350         buf[cnt] = 0;
5351
5352         ret = trace_set_options(tr, buf);
5353         if (ret < 0)
5354                 return ret;
5355
5356         *ppos += cnt;
5357
5358         return cnt;
5359 }
5360
5361 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5362 {
5363         struct trace_array *tr = inode->i_private;
5364         int ret;
5365
5366         ret = tracing_check_open_get_tr(tr);
5367         if (ret)
5368                 return ret;
5369
5370         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5371         if (ret < 0)
5372                 trace_array_put(tr);
5373
5374         return ret;
5375 }
5376
5377 static const struct file_operations tracing_iter_fops = {
5378         .open           = tracing_trace_options_open,
5379         .read           = seq_read,
5380         .llseek         = seq_lseek,
5381         .release        = tracing_single_release_tr,
5382         .write          = tracing_trace_options_write,
5383 };
5384
5385 static const char readme_msg[] =
5386         "tracing mini-HOWTO:\n\n"
5387         "# echo 0 > tracing_on : quick way to disable tracing\n"
5388         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5389         " Important files:\n"
5390         "  trace\t\t\t- The static contents of the buffer\n"
5391         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5392         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5393         "  current_tracer\t- function and latency tracers\n"
5394         "  available_tracers\t- list of configured tracers for current_tracer\n"
5395         "  error_log\t- error log for failed commands (that support it)\n"
5396         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5397         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5398         "  trace_clock\t\t-change the clock used to order events\n"
5399         "       local:   Per cpu clock but may not be synced across CPUs\n"
5400         "      global:   Synced across CPUs but slows tracing down.\n"
5401         "     counter:   Not a clock, but just an increment\n"
5402         "      uptime:   Jiffy counter from time of boot\n"
5403         "        perf:   Same clock that perf events use\n"
5404 #ifdef CONFIG_X86_64
5405         "     x86-tsc:   TSC cycle counter\n"
5406 #endif
5407         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5408         "       delta:   Delta difference against a buffer-wide timestamp\n"
5409         "    absolute:   Absolute (standalone) timestamp\n"
5410         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5411         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5412         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5413         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5414         "\t\t\t  Remove sub-buffer with rmdir\n"
5415         "  trace_options\t\t- Set format or modify how tracing happens\n"
5416         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5417         "\t\t\t  option name\n"
5418         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5419 #ifdef CONFIG_DYNAMIC_FTRACE
5420         "\n  available_filter_functions - list of functions that can be filtered on\n"
5421         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5422         "\t\t\t  functions\n"
5423         "\t     accepts: func_full_name or glob-matching-pattern\n"
5424         "\t     modules: Can select a group via module\n"
5425         "\t      Format: :mod:<module-name>\n"
5426         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5427         "\t    triggers: a command to perform when function is hit\n"
5428         "\t      Format: <function>:<trigger>[:count]\n"
5429         "\t     trigger: traceon, traceoff\n"
5430         "\t\t      enable_event:<system>:<event>\n"
5431         "\t\t      disable_event:<system>:<event>\n"
5432 #ifdef CONFIG_STACKTRACE
5433         "\t\t      stacktrace\n"
5434 #endif
5435 #ifdef CONFIG_TRACER_SNAPSHOT
5436         "\t\t      snapshot\n"
5437 #endif
5438         "\t\t      dump\n"
5439         "\t\t      cpudump\n"
5440         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5441         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5442         "\t     The first one will disable tracing every time do_fault is hit\n"
5443         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5444         "\t       The first time do trap is hit and it disables tracing, the\n"
5445         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5446         "\t       the counter will not decrement. It only decrements when the\n"
5447         "\t       trigger did work\n"
5448         "\t     To remove trigger without count:\n"
5449         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5450         "\t     To remove trigger with a count:\n"
5451         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5452         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5453         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5454         "\t    modules: Can select a group via module command :mod:\n"
5455         "\t    Does not accept triggers\n"
5456 #endif /* CONFIG_DYNAMIC_FTRACE */
5457 #ifdef CONFIG_FUNCTION_TRACER
5458         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5459         "\t\t    (function)\n"
5460         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5461         "\t\t    (function)\n"
5462 #endif
5463 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5464         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5465         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5466         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5467 #endif
5468 #ifdef CONFIG_TRACER_SNAPSHOT
5469         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5470         "\t\t\t  snapshot buffer. Read the contents for more\n"
5471         "\t\t\t  information\n"
5472 #endif
5473 #ifdef CONFIG_STACK_TRACER
5474         "  stack_trace\t\t- Shows the max stack trace when active\n"
5475         "  stack_max_size\t- Shows current max stack size that was traced\n"
5476         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5477         "\t\t\t  new trace)\n"
5478 #ifdef CONFIG_DYNAMIC_FTRACE
5479         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5480         "\t\t\t  traces\n"
5481 #endif
5482 #endif /* CONFIG_STACK_TRACER */
5483 #ifdef CONFIG_DYNAMIC_EVENTS
5484         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5485         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5486 #endif
5487 #ifdef CONFIG_KPROBE_EVENTS
5488         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5489         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5490 #endif
5491 #ifdef CONFIG_UPROBE_EVENTS
5492         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5493         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5494 #endif
5495 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5496         "\t  accepts: event-definitions (one definition per line)\n"
5497         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5498         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5499 #ifdef CONFIG_HIST_TRIGGERS
5500         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5501 #endif
5502         "\t           -:[<group>/]<event>\n"
5503 #ifdef CONFIG_KPROBE_EVENTS
5504         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5505   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5506 #endif
5507 #ifdef CONFIG_UPROBE_EVENTS
5508   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5509 #endif
5510         "\t     args: <name>=fetcharg[:type]\n"
5511         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5512 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5513         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5514 #else
5515         "\t           $stack<index>, $stack, $retval, $comm,\n"
5516 #endif
5517         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5518         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5519         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5520         "\t           <type>\\[<array-size>\\]\n"
5521 #ifdef CONFIG_HIST_TRIGGERS
5522         "\t    field: <stype> <name>;\n"
5523         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5524         "\t           [unsigned] char/int/long\n"
5525 #endif
5526 #endif
5527         "  events/\t\t- Directory containing all trace event subsystems:\n"
5528         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5529         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5530         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5531         "\t\t\t  events\n"
5532         "      filter\t\t- If set, only events passing filter are traced\n"
5533         "  events/<system>/<event>/\t- Directory containing control files for\n"
5534         "\t\t\t  <event>:\n"
5535         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5536         "      filter\t\t- If set, only events passing filter are traced\n"
5537         "      trigger\t\t- If set, a command to perform when event is hit\n"
5538         "\t    Format: <trigger>[:count][if <filter>]\n"
5539         "\t   trigger: traceon, traceoff\n"
5540         "\t            enable_event:<system>:<event>\n"
5541         "\t            disable_event:<system>:<event>\n"
5542 #ifdef CONFIG_HIST_TRIGGERS
5543         "\t            enable_hist:<system>:<event>\n"
5544         "\t            disable_hist:<system>:<event>\n"
5545 #endif
5546 #ifdef CONFIG_STACKTRACE
5547         "\t\t    stacktrace\n"
5548 #endif
5549 #ifdef CONFIG_TRACER_SNAPSHOT
5550         "\t\t    snapshot\n"
5551 #endif
5552 #ifdef CONFIG_HIST_TRIGGERS
5553         "\t\t    hist (see below)\n"
5554 #endif
5555         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5556         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5557         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5558         "\t                  events/block/block_unplug/trigger\n"
5559         "\t   The first disables tracing every time block_unplug is hit.\n"
5560         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5561         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5562         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5563         "\t   Like function triggers, the counter is only decremented if it\n"
5564         "\t    enabled or disabled tracing.\n"
5565         "\t   To remove a trigger without a count:\n"
5566         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5567         "\t   To remove a trigger with a count:\n"
5568         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5569         "\t   Filters can be ignored when removing a trigger.\n"
5570 #ifdef CONFIG_HIST_TRIGGERS
5571         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5572         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5573         "\t            [:values=<field1[,field2,...]>]\n"
5574         "\t            [:sort=<field1[,field2,...]>]\n"
5575         "\t            [:size=#entries]\n"
5576         "\t            [:pause][:continue][:clear]\n"
5577         "\t            [:name=histname1]\n"
5578         "\t            [:<handler>.<action>]\n"
5579         "\t            [if <filter>]\n\n"
5580         "\t    Note, special fields can be used as well:\n"
5581         "\t            common_timestamp - to record current timestamp\n"
5582         "\t            common_cpu - to record the CPU the event happened on\n"
5583         "\n"
5584         "\t    When a matching event is hit, an entry is added to a hash\n"
5585         "\t    table using the key(s) and value(s) named, and the value of a\n"
5586         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5587         "\t    correspond to fields in the event's format description.  Keys\n"
5588         "\t    can be any field, or the special string 'stacktrace'.\n"
5589         "\t    Compound keys consisting of up to two fields can be specified\n"
5590         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5591         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5592         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5593         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5594         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5595         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5596         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5597         "\t    its histogram data will be shared with other triggers of the\n"
5598         "\t    same name, and trigger hits will update this common data.\n\n"
5599         "\t    Reading the 'hist' file for the event will dump the hash\n"
5600         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5601         "\t    triggers attached to an event, there will be a table for each\n"
5602         "\t    trigger in the output.  The table displayed for a named\n"
5603         "\t    trigger will be the same as any other instance having the\n"
5604         "\t    same name.  The default format used to display a given field\n"
5605         "\t    can be modified by appending any of the following modifiers\n"
5606         "\t    to the field name, as applicable:\n\n"
5607         "\t            .hex        display a number as a hex value\n"
5608         "\t            .sym        display an address as a symbol\n"
5609         "\t            .sym-offset display an address as a symbol and offset\n"
5610         "\t            .execname   display a common_pid as a program name\n"
5611         "\t            .syscall    display a syscall id as a syscall name\n"
5612         "\t            .log2       display log2 value rather than raw number\n"
5613         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5614         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5615         "\t    trigger or to start a hist trigger but not log any events\n"
5616         "\t    until told to do so.  'continue' can be used to start or\n"
5617         "\t    restart a paused hist trigger.\n\n"
5618         "\t    The 'clear' parameter will clear the contents of a running\n"
5619         "\t    hist trigger and leave its current paused/active state\n"
5620         "\t    unchanged.\n\n"
5621         "\t    The enable_hist and disable_hist triggers can be used to\n"
5622         "\t    have one event conditionally start and stop another event's\n"
5623         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5624         "\t    the enable_event and disable_event triggers.\n\n"
5625         "\t    Hist trigger handlers and actions are executed whenever a\n"
5626         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5627         "\t        <handler>.<action>\n\n"
5628         "\t    The available handlers are:\n\n"
5629         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5630         "\t        onmax(var)               - invoke if var exceeds current max\n"
5631         "\t        onchange(var)            - invoke action if var changes\n\n"
5632         "\t    The available actions are:\n\n"
5633         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5634         "\t        save(field,...)                      - save current event fields\n"
5635 #ifdef CONFIG_TRACER_SNAPSHOT
5636         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5637 #endif
5638 #ifdef CONFIG_SYNTH_EVENTS
5639         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5640         "\t  Write into this file to define/undefine new synthetic events.\n"
5641         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5642 #endif
5643 #endif
5644 ;
5645
5646 static ssize_t
5647 tracing_readme_read(struct file *filp, char __user *ubuf,
5648                        size_t cnt, loff_t *ppos)
5649 {
5650         return simple_read_from_buffer(ubuf, cnt, ppos,
5651                                         readme_msg, strlen(readme_msg));
5652 }
5653
5654 static const struct file_operations tracing_readme_fops = {
5655         .open           = tracing_open_generic,
5656         .read           = tracing_readme_read,
5657         .llseek         = generic_file_llseek,
5658 };
5659
5660 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5661 {
5662         int pid = ++(*pos);
5663
5664         return trace_find_tgid_ptr(pid);
5665 }
5666
5667 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5668 {
5669         int pid = *pos;
5670
5671         return trace_find_tgid_ptr(pid);
5672 }
5673
5674 static void saved_tgids_stop(struct seq_file *m, void *v)
5675 {
5676 }
5677
5678 static int saved_tgids_show(struct seq_file *m, void *v)
5679 {
5680         int *entry = (int *)v;
5681         int pid = entry - tgid_map;
5682         int tgid = *entry;
5683
5684         if (tgid == 0)
5685                 return SEQ_SKIP;
5686
5687         seq_printf(m, "%d %d\n", pid, tgid);
5688         return 0;
5689 }
5690
5691 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5692         .start          = saved_tgids_start,
5693         .stop           = saved_tgids_stop,
5694         .next           = saved_tgids_next,
5695         .show           = saved_tgids_show,
5696 };
5697
5698 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5699 {
5700         int ret;
5701
5702         ret = tracing_check_open_get_tr(NULL);
5703         if (ret)
5704                 return ret;
5705
5706         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5707 }
5708
5709
5710 static const struct file_operations tracing_saved_tgids_fops = {
5711         .open           = tracing_saved_tgids_open,
5712         .read           = seq_read,
5713         .llseek         = seq_lseek,
5714         .release        = seq_release,
5715 };
5716
5717 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5718 {
5719         unsigned int *ptr = v;
5720
5721         if (*pos || m->count)
5722                 ptr++;
5723
5724         (*pos)++;
5725
5726         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5727              ptr++) {
5728                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5729                         continue;
5730
5731                 return ptr;
5732         }
5733
5734         return NULL;
5735 }
5736
5737 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5738 {
5739         void *v;
5740         loff_t l = 0;
5741
5742         preempt_disable();
5743         arch_spin_lock(&trace_cmdline_lock);
5744
5745         v = &savedcmd->map_cmdline_to_pid[0];
5746         while (l <= *pos) {
5747                 v = saved_cmdlines_next(m, v, &l);
5748                 if (!v)
5749                         return NULL;
5750         }
5751
5752         return v;
5753 }
5754
5755 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5756 {
5757         arch_spin_unlock(&trace_cmdline_lock);
5758         preempt_enable();
5759 }
5760
5761 static int saved_cmdlines_show(struct seq_file *m, void *v)
5762 {
5763         char buf[TASK_COMM_LEN];
5764         unsigned int *pid = v;
5765
5766         __trace_find_cmdline(*pid, buf);
5767         seq_printf(m, "%d %s\n", *pid, buf);
5768         return 0;
5769 }
5770
5771 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5772         .start          = saved_cmdlines_start,
5773         .next           = saved_cmdlines_next,
5774         .stop           = saved_cmdlines_stop,
5775         .show           = saved_cmdlines_show,
5776 };
5777
5778 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5779 {
5780         int ret;
5781
5782         ret = tracing_check_open_get_tr(NULL);
5783         if (ret)
5784                 return ret;
5785
5786         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5787 }
5788
5789 static const struct file_operations tracing_saved_cmdlines_fops = {
5790         .open           = tracing_saved_cmdlines_open,
5791         .read           = seq_read,
5792         .llseek         = seq_lseek,
5793         .release        = seq_release,
5794 };
5795
5796 static ssize_t
5797 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5798                                  size_t cnt, loff_t *ppos)
5799 {
5800         char buf[64];
5801         int r;
5802
5803         arch_spin_lock(&trace_cmdline_lock);
5804         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5805         arch_spin_unlock(&trace_cmdline_lock);
5806
5807         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5808 }
5809
5810 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5811 {
5812         kfree(s->saved_cmdlines);
5813         kfree(s->map_cmdline_to_pid);
5814         kfree(s);
5815 }
5816
5817 static int tracing_resize_saved_cmdlines(unsigned int val)
5818 {
5819         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5820
5821         s = kmalloc(sizeof(*s), GFP_KERNEL);
5822         if (!s)
5823                 return -ENOMEM;
5824
5825         if (allocate_cmdlines_buffer(val, s) < 0) {
5826                 kfree(s);
5827                 return -ENOMEM;
5828         }
5829
5830         arch_spin_lock(&trace_cmdline_lock);
5831         savedcmd_temp = savedcmd;
5832         savedcmd = s;
5833         arch_spin_unlock(&trace_cmdline_lock);
5834         free_saved_cmdlines_buffer(savedcmd_temp);
5835
5836         return 0;
5837 }
5838
5839 static ssize_t
5840 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5841                                   size_t cnt, loff_t *ppos)
5842 {
5843         unsigned long val;
5844         int ret;
5845
5846         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5847         if (ret)
5848                 return ret;
5849
5850         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5851         if (!val || val > PID_MAX_DEFAULT)
5852                 return -EINVAL;
5853
5854         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5855         if (ret < 0)
5856                 return ret;
5857
5858         *ppos += cnt;
5859
5860         return cnt;
5861 }
5862
5863 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5864         .open           = tracing_open_generic,
5865         .read           = tracing_saved_cmdlines_size_read,
5866         .write          = tracing_saved_cmdlines_size_write,
5867 };
5868
5869 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5870 static union trace_eval_map_item *
5871 update_eval_map(union trace_eval_map_item *ptr)
5872 {
5873         if (!ptr->map.eval_string) {
5874                 if (ptr->tail.next) {
5875                         ptr = ptr->tail.next;
5876                         /* Set ptr to the next real item (skip head) */
5877                         ptr++;
5878                 } else
5879                         return NULL;
5880         }
5881         return ptr;
5882 }
5883
5884 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5885 {
5886         union trace_eval_map_item *ptr = v;
5887
5888         /*
5889          * Paranoid! If ptr points to end, we don't want to increment past it.
5890          * This really should never happen.
5891          */
5892         (*pos)++;
5893         ptr = update_eval_map(ptr);
5894         if (WARN_ON_ONCE(!ptr))
5895                 return NULL;
5896
5897         ptr++;
5898         ptr = update_eval_map(ptr);
5899
5900         return ptr;
5901 }
5902
5903 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5904 {
5905         union trace_eval_map_item *v;
5906         loff_t l = 0;
5907
5908         mutex_lock(&trace_eval_mutex);
5909
5910         v = trace_eval_maps;
5911         if (v)
5912                 v++;
5913
5914         while (v && l < *pos) {
5915                 v = eval_map_next(m, v, &l);
5916         }
5917
5918         return v;
5919 }
5920
5921 static void eval_map_stop(struct seq_file *m, void *v)
5922 {
5923         mutex_unlock(&trace_eval_mutex);
5924 }
5925
5926 static int eval_map_show(struct seq_file *m, void *v)
5927 {
5928         union trace_eval_map_item *ptr = v;
5929
5930         seq_printf(m, "%s %ld (%s)\n",
5931                    ptr->map.eval_string, ptr->map.eval_value,
5932                    ptr->map.system);
5933
5934         return 0;
5935 }
5936
5937 static const struct seq_operations tracing_eval_map_seq_ops = {
5938         .start          = eval_map_start,
5939         .next           = eval_map_next,
5940         .stop           = eval_map_stop,
5941         .show           = eval_map_show,
5942 };
5943
5944 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5945 {
5946         int ret;
5947
5948         ret = tracing_check_open_get_tr(NULL);
5949         if (ret)
5950                 return ret;
5951
5952         return seq_open(filp, &tracing_eval_map_seq_ops);
5953 }
5954
5955 static const struct file_operations tracing_eval_map_fops = {
5956         .open           = tracing_eval_map_open,
5957         .read           = seq_read,
5958         .llseek         = seq_lseek,
5959         .release        = seq_release,
5960 };
5961
5962 static inline union trace_eval_map_item *
5963 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5964 {
5965         /* Return tail of array given the head */
5966         return ptr + ptr->head.length + 1;
5967 }
5968
5969 static void
5970 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5971                            int len)
5972 {
5973         struct trace_eval_map **stop;
5974         struct trace_eval_map **map;
5975         union trace_eval_map_item *map_array;
5976         union trace_eval_map_item *ptr;
5977
5978         stop = start + len;
5979
5980         /*
5981          * The trace_eval_maps contains the map plus a head and tail item,
5982          * where the head holds the module and length of array, and the
5983          * tail holds a pointer to the next list.
5984          */
5985         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5986         if (!map_array) {
5987                 pr_warn("Unable to allocate trace eval mapping\n");
5988                 return;
5989         }
5990
5991         mutex_lock(&trace_eval_mutex);
5992
5993         if (!trace_eval_maps)
5994                 trace_eval_maps = map_array;
5995         else {
5996                 ptr = trace_eval_maps;
5997                 for (;;) {
5998                         ptr = trace_eval_jmp_to_tail(ptr);
5999                         if (!ptr->tail.next)
6000                                 break;
6001                         ptr = ptr->tail.next;
6002
6003                 }
6004                 ptr->tail.next = map_array;
6005         }
6006         map_array->head.mod = mod;
6007         map_array->head.length = len;
6008         map_array++;
6009
6010         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6011                 map_array->map = **map;
6012                 map_array++;
6013         }
6014         memset(map_array, 0, sizeof(*map_array));
6015
6016         mutex_unlock(&trace_eval_mutex);
6017 }
6018
6019 static void trace_create_eval_file(struct dentry *d_tracer)
6020 {
6021         trace_create_file("eval_map", 0444, d_tracer,
6022                           NULL, &tracing_eval_map_fops);
6023 }
6024
6025 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6026 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6027 static inline void trace_insert_eval_map_file(struct module *mod,
6028                               struct trace_eval_map **start, int len) { }
6029 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6030
6031 static void trace_insert_eval_map(struct module *mod,
6032                                   struct trace_eval_map **start, int len)
6033 {
6034         struct trace_eval_map **map;
6035
6036         if (len <= 0)
6037                 return;
6038
6039         map = start;
6040
6041         trace_event_eval_update(map, len);
6042
6043         trace_insert_eval_map_file(mod, start, len);
6044 }
6045
6046 static ssize_t
6047 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6048                        size_t cnt, loff_t *ppos)
6049 {
6050         struct trace_array *tr = filp->private_data;
6051         char buf[MAX_TRACER_SIZE+2];
6052         int r;
6053
6054         mutex_lock(&trace_types_lock);
6055         r = sprintf(buf, "%s\n", tr->current_trace->name);
6056         mutex_unlock(&trace_types_lock);
6057
6058         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6059 }
6060
6061 int tracer_init(struct tracer *t, struct trace_array *tr)
6062 {
6063         tracing_reset_online_cpus(&tr->array_buffer);
6064         return t->init(tr);
6065 }
6066
6067 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6068 {
6069         int cpu;
6070
6071         for_each_tracing_cpu(cpu)
6072                 per_cpu_ptr(buf->data, cpu)->entries = val;
6073 }
6074
6075 #ifdef CONFIG_TRACER_MAX_TRACE
6076 /* resize @tr's buffer to the size of @size_tr's entries */
6077 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6078                                         struct array_buffer *size_buf, int cpu_id)
6079 {
6080         int cpu, ret = 0;
6081
6082         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6083                 for_each_tracing_cpu(cpu) {
6084                         ret = ring_buffer_resize(trace_buf->buffer,
6085                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6086                         if (ret < 0)
6087                                 break;
6088                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6089                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6090                 }
6091         } else {
6092                 ret = ring_buffer_resize(trace_buf->buffer,
6093                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6094                 if (ret == 0)
6095                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6096                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6097         }
6098
6099         return ret;
6100 }
6101 #endif /* CONFIG_TRACER_MAX_TRACE */
6102
6103 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6104                                         unsigned long size, int cpu)
6105 {
6106         int ret;
6107
6108         /*
6109          * If kernel or user changes the size of the ring buffer
6110          * we use the size that was given, and we can forget about
6111          * expanding it later.
6112          */
6113         ring_buffer_expanded = true;
6114
6115         /* May be called before buffers are initialized */
6116         if (!tr->array_buffer.buffer)
6117                 return 0;
6118
6119         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6120         if (ret < 0)
6121                 return ret;
6122
6123 #ifdef CONFIG_TRACER_MAX_TRACE
6124         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6125             !tr->current_trace->use_max_tr)
6126                 goto out;
6127
6128         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6129         if (ret < 0) {
6130                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6131                                                      &tr->array_buffer, cpu);
6132                 if (r < 0) {
6133                         /*
6134                          * AARGH! We are left with different
6135                          * size max buffer!!!!
6136                          * The max buffer is our "snapshot" buffer.
6137                          * When a tracer needs a snapshot (one of the
6138                          * latency tracers), it swaps the max buffer
6139                          * with the saved snap shot. We succeeded to
6140                          * update the size of the main buffer, but failed to
6141                          * update the size of the max buffer. But when we tried
6142                          * to reset the main buffer to the original size, we
6143                          * failed there too. This is very unlikely to
6144                          * happen, but if it does, warn and kill all
6145                          * tracing.
6146                          */
6147                         WARN_ON(1);
6148                         tracing_disabled = 1;
6149                 }
6150                 return ret;
6151         }
6152
6153         if (cpu == RING_BUFFER_ALL_CPUS)
6154                 set_buffer_entries(&tr->max_buffer, size);
6155         else
6156                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6157
6158  out:
6159 #endif /* CONFIG_TRACER_MAX_TRACE */
6160
6161         if (cpu == RING_BUFFER_ALL_CPUS)
6162                 set_buffer_entries(&tr->array_buffer, size);
6163         else
6164                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6165
6166         return ret;
6167 }
6168
6169 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6170                                   unsigned long size, int cpu_id)
6171 {
6172         int ret = size;
6173
6174         mutex_lock(&trace_types_lock);
6175
6176         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6177                 /* make sure, this cpu is enabled in the mask */
6178                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6179                         ret = -EINVAL;
6180                         goto out;
6181                 }
6182         }
6183
6184         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6185         if (ret < 0)
6186                 ret = -ENOMEM;
6187
6188 out:
6189         mutex_unlock(&trace_types_lock);
6190
6191         return ret;
6192 }
6193
6194
6195 /**
6196  * tracing_update_buffers - used by tracing facility to expand ring buffers
6197  *
6198  * To save on memory when the tracing is never used on a system with it
6199  * configured in. The ring buffers are set to a minimum size. But once
6200  * a user starts to use the tracing facility, then they need to grow
6201  * to their default size.
6202  *
6203  * This function is to be called when a tracer is about to be used.
6204  */
6205 int tracing_update_buffers(void)
6206 {
6207         int ret = 0;
6208
6209         mutex_lock(&trace_types_lock);
6210         if (!ring_buffer_expanded)
6211                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6212                                                 RING_BUFFER_ALL_CPUS);
6213         mutex_unlock(&trace_types_lock);
6214
6215         return ret;
6216 }
6217
6218 struct trace_option_dentry;
6219
6220 static void
6221 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6222
6223 /*
6224  * Used to clear out the tracer before deletion of an instance.
6225  * Must have trace_types_lock held.
6226  */
6227 static void tracing_set_nop(struct trace_array *tr)
6228 {
6229         if (tr->current_trace == &nop_trace)
6230                 return;
6231         
6232         tr->current_trace->enabled--;
6233
6234         if (tr->current_trace->reset)
6235                 tr->current_trace->reset(tr);
6236
6237         tr->current_trace = &nop_trace;
6238 }
6239
6240 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6241 {
6242         /* Only enable if the directory has been created already. */
6243         if (!tr->dir)
6244                 return;
6245
6246         create_trace_option_files(tr, t);
6247 }
6248
6249 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6250 {
6251         struct tracer *t;
6252 #ifdef CONFIG_TRACER_MAX_TRACE
6253         bool had_max_tr;
6254 #endif
6255         int ret = 0;
6256
6257         mutex_lock(&trace_types_lock);
6258
6259         if (!ring_buffer_expanded) {
6260                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6261                                                 RING_BUFFER_ALL_CPUS);
6262                 if (ret < 0)
6263                         goto out;
6264                 ret = 0;
6265         }
6266
6267         for (t = trace_types; t; t = t->next) {
6268                 if (strcmp(t->name, buf) == 0)
6269                         break;
6270         }
6271         if (!t) {
6272                 ret = -EINVAL;
6273                 goto out;
6274         }
6275         if (t == tr->current_trace)
6276                 goto out;
6277
6278 #ifdef CONFIG_TRACER_SNAPSHOT
6279         if (t->use_max_tr) {
6280                 arch_spin_lock(&tr->max_lock);
6281                 if (tr->cond_snapshot)
6282                         ret = -EBUSY;
6283                 arch_spin_unlock(&tr->max_lock);
6284                 if (ret)
6285                         goto out;
6286         }
6287 #endif
6288         /* Some tracers won't work on kernel command line */
6289         if (system_state < SYSTEM_RUNNING && t->noboot) {
6290                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6291                         t->name);
6292                 goto out;
6293         }
6294
6295         /* Some tracers are only allowed for the top level buffer */
6296         if (!trace_ok_for_array(t, tr)) {
6297                 ret = -EINVAL;
6298                 goto out;
6299         }
6300
6301         /* If trace pipe files are being read, we can't change the tracer */
6302         if (tr->trace_ref) {
6303                 ret = -EBUSY;
6304                 goto out;
6305         }
6306
6307         trace_branch_disable();
6308
6309         tr->current_trace->enabled--;
6310
6311         if (tr->current_trace->reset)
6312                 tr->current_trace->reset(tr);
6313
6314         /* Current trace needs to be nop_trace before synchronize_rcu */
6315         tr->current_trace = &nop_trace;
6316
6317 #ifdef CONFIG_TRACER_MAX_TRACE
6318         had_max_tr = tr->allocated_snapshot;
6319
6320         if (had_max_tr && !t->use_max_tr) {
6321                 /*
6322                  * We need to make sure that the update_max_tr sees that
6323                  * current_trace changed to nop_trace to keep it from
6324                  * swapping the buffers after we resize it.
6325                  * The update_max_tr is called from interrupts disabled
6326                  * so a synchronized_sched() is sufficient.
6327                  */
6328                 synchronize_rcu();
6329                 free_snapshot(tr);
6330         }
6331 #endif
6332
6333 #ifdef CONFIG_TRACER_MAX_TRACE
6334         if (t->use_max_tr && !had_max_tr) {
6335                 ret = tracing_alloc_snapshot_instance(tr);
6336                 if (ret < 0)
6337                         goto out;
6338         }
6339 #endif
6340
6341         if (t->init) {
6342                 ret = tracer_init(t, tr);
6343                 if (ret)
6344                         goto out;
6345         }
6346
6347         tr->current_trace = t;
6348         tr->current_trace->enabled++;
6349         trace_branch_enable(tr);
6350  out:
6351         mutex_unlock(&trace_types_lock);
6352
6353         return ret;
6354 }
6355
6356 static ssize_t
6357 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6358                         size_t cnt, loff_t *ppos)
6359 {
6360         struct trace_array *tr = filp->private_data;
6361         char buf[MAX_TRACER_SIZE+1];
6362         int i;
6363         size_t ret;
6364         int err;
6365
6366         ret = cnt;
6367
6368         if (cnt > MAX_TRACER_SIZE)
6369                 cnt = MAX_TRACER_SIZE;
6370
6371         if (copy_from_user(buf, ubuf, cnt))
6372                 return -EFAULT;
6373
6374         buf[cnt] = 0;
6375
6376         /* strip ending whitespace. */
6377         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6378                 buf[i] = 0;
6379
6380         err = tracing_set_tracer(tr, buf);
6381         if (err)
6382                 return err;
6383
6384         *ppos += ret;
6385
6386         return ret;
6387 }
6388
6389 static ssize_t
6390 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6391                    size_t cnt, loff_t *ppos)
6392 {
6393         char buf[64];
6394         int r;
6395
6396         r = snprintf(buf, sizeof(buf), "%ld\n",
6397                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6398         if (r > sizeof(buf))
6399                 r = sizeof(buf);
6400         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6401 }
6402
6403 static ssize_t
6404 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6405                     size_t cnt, loff_t *ppos)
6406 {
6407         unsigned long val;
6408         int ret;
6409
6410         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6411         if (ret)
6412                 return ret;
6413
6414         *ptr = val * 1000;
6415
6416         return cnt;
6417 }
6418
6419 static ssize_t
6420 tracing_thresh_read(struct file *filp, char __user *ubuf,
6421                     size_t cnt, loff_t *ppos)
6422 {
6423         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6424 }
6425
6426 static ssize_t
6427 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6428                      size_t cnt, loff_t *ppos)
6429 {
6430         struct trace_array *tr = filp->private_data;
6431         int ret;
6432
6433         mutex_lock(&trace_types_lock);
6434         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6435         if (ret < 0)
6436                 goto out;
6437
6438         if (tr->current_trace->update_thresh) {
6439                 ret = tr->current_trace->update_thresh(tr);
6440                 if (ret < 0)
6441                         goto out;
6442         }
6443
6444         ret = cnt;
6445 out:
6446         mutex_unlock(&trace_types_lock);
6447
6448         return ret;
6449 }
6450
6451 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6452
6453 static ssize_t
6454 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6455                      size_t cnt, loff_t *ppos)
6456 {
6457         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6458 }
6459
6460 static ssize_t
6461 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6462                       size_t cnt, loff_t *ppos)
6463 {
6464         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6465 }
6466
6467 #endif
6468
6469 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6470 {
6471         struct trace_array *tr = inode->i_private;
6472         struct trace_iterator *iter;
6473         int ret;
6474
6475         ret = tracing_check_open_get_tr(tr);
6476         if (ret)
6477                 return ret;
6478
6479         mutex_lock(&trace_types_lock);
6480
6481         /* create a buffer to store the information to pass to userspace */
6482         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6483         if (!iter) {
6484                 ret = -ENOMEM;
6485                 __trace_array_put(tr);
6486                 goto out;
6487         }
6488
6489         trace_seq_init(&iter->seq);
6490         iter->trace = tr->current_trace;
6491
6492         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6493                 ret = -ENOMEM;
6494                 goto fail;
6495         }
6496
6497         /* trace pipe does not show start of buffer */
6498         cpumask_setall(iter->started);
6499
6500         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6501                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6502
6503         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6504         if (trace_clocks[tr->clock_id].in_ns)
6505                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6506
6507         iter->tr = tr;
6508         iter->array_buffer = &tr->array_buffer;
6509         iter->cpu_file = tracing_get_cpu(inode);
6510         mutex_init(&iter->mutex);
6511         filp->private_data = iter;
6512
6513         if (iter->trace->pipe_open)
6514                 iter->trace->pipe_open(iter);
6515
6516         nonseekable_open(inode, filp);
6517
6518         tr->trace_ref++;
6519 out:
6520         mutex_unlock(&trace_types_lock);
6521         return ret;
6522
6523 fail:
6524         kfree(iter);
6525         __trace_array_put(tr);
6526         mutex_unlock(&trace_types_lock);
6527         return ret;
6528 }
6529
6530 static int tracing_release_pipe(struct inode *inode, struct file *file)
6531 {
6532         struct trace_iterator *iter = file->private_data;
6533         struct trace_array *tr = inode->i_private;
6534
6535         mutex_lock(&trace_types_lock);
6536
6537         tr->trace_ref--;
6538
6539         if (iter->trace->pipe_close)
6540                 iter->trace->pipe_close(iter);
6541
6542         mutex_unlock(&trace_types_lock);
6543
6544         free_cpumask_var(iter->started);
6545         mutex_destroy(&iter->mutex);
6546         kfree(iter);
6547
6548         trace_array_put(tr);
6549
6550         return 0;
6551 }
6552
6553 static __poll_t
6554 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6555 {
6556         struct trace_array *tr = iter->tr;
6557
6558         /* Iterators are static, they should be filled or empty */
6559         if (trace_buffer_iter(iter, iter->cpu_file))
6560                 return EPOLLIN | EPOLLRDNORM;
6561
6562         if (tr->trace_flags & TRACE_ITER_BLOCK)
6563                 /*
6564                  * Always select as readable when in blocking mode
6565                  */
6566                 return EPOLLIN | EPOLLRDNORM;
6567         else
6568                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6569                                              filp, poll_table);
6570 }
6571
6572 static __poll_t
6573 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6574 {
6575         struct trace_iterator *iter = filp->private_data;
6576
6577         return trace_poll(iter, filp, poll_table);
6578 }
6579
6580 /* Must be called with iter->mutex held. */
6581 static int tracing_wait_pipe(struct file *filp)
6582 {
6583         struct trace_iterator *iter = filp->private_data;
6584         int ret;
6585
6586         while (trace_empty(iter)) {
6587
6588                 if ((filp->f_flags & O_NONBLOCK)) {
6589                         return -EAGAIN;
6590                 }
6591
6592                 /*
6593                  * We block until we read something and tracing is disabled.
6594                  * We still block if tracing is disabled, but we have never
6595                  * read anything. This allows a user to cat this file, and
6596                  * then enable tracing. But after we have read something,
6597                  * we give an EOF when tracing is again disabled.
6598                  *
6599                  * iter->pos will be 0 if we haven't read anything.
6600                  */
6601                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6602                         break;
6603
6604                 mutex_unlock(&iter->mutex);
6605
6606                 ret = wait_on_pipe(iter, 0);
6607
6608                 mutex_lock(&iter->mutex);
6609
6610                 if (ret)
6611                         return ret;
6612         }
6613
6614         return 1;
6615 }
6616
6617 /*
6618  * Consumer reader.
6619  */
6620 static ssize_t
6621 tracing_read_pipe(struct file *filp, char __user *ubuf,
6622                   size_t cnt, loff_t *ppos)
6623 {
6624         struct trace_iterator *iter = filp->private_data;
6625         ssize_t sret;
6626
6627         /*
6628          * Avoid more than one consumer on a single file descriptor
6629          * This is just a matter of traces coherency, the ring buffer itself
6630          * is protected.
6631          */
6632         mutex_lock(&iter->mutex);
6633
6634         /* return any leftover data */
6635         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6636         if (sret != -EBUSY)
6637                 goto out;
6638
6639         trace_seq_init(&iter->seq);
6640
6641         if (iter->trace->read) {
6642                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6643                 if (sret)
6644                         goto out;
6645         }
6646
6647 waitagain:
6648         sret = tracing_wait_pipe(filp);
6649         if (sret <= 0)
6650                 goto out;
6651
6652         /* stop when tracing is finished */
6653         if (trace_empty(iter)) {
6654                 sret = 0;
6655                 goto out;
6656         }
6657
6658         if (cnt >= PAGE_SIZE)
6659                 cnt = PAGE_SIZE - 1;
6660
6661         /* reset all but tr, trace, and overruns */
6662         memset(&iter->seq, 0,
6663                sizeof(struct trace_iterator) -
6664                offsetof(struct trace_iterator, seq));
6665         cpumask_clear(iter->started);
6666         trace_seq_init(&iter->seq);
6667         iter->pos = -1;
6668
6669         trace_event_read_lock();
6670         trace_access_lock(iter->cpu_file);
6671         while (trace_find_next_entry_inc(iter) != NULL) {
6672                 enum print_line_t ret;
6673                 int save_len = iter->seq.seq.len;
6674
6675                 ret = print_trace_line(iter);
6676                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6677                         /* don't print partial lines */
6678                         iter->seq.seq.len = save_len;
6679                         break;
6680                 }
6681                 if (ret != TRACE_TYPE_NO_CONSUME)
6682                         trace_consume(iter);
6683
6684                 if (trace_seq_used(&iter->seq) >= cnt)
6685                         break;
6686
6687                 /*
6688                  * Setting the full flag means we reached the trace_seq buffer
6689                  * size and we should leave by partial output condition above.
6690                  * One of the trace_seq_* functions is not used properly.
6691                  */
6692                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6693                           iter->ent->type);
6694         }
6695         trace_access_unlock(iter->cpu_file);
6696         trace_event_read_unlock();
6697
6698         /* Now copy what we have to the user */
6699         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6700         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6701                 trace_seq_init(&iter->seq);
6702
6703         /*
6704          * If there was nothing to send to user, in spite of consuming trace
6705          * entries, go back to wait for more entries.
6706          */
6707         if (sret == -EBUSY)
6708                 goto waitagain;
6709
6710 out:
6711         mutex_unlock(&iter->mutex);
6712
6713         return sret;
6714 }
6715
6716 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6717                                      unsigned int idx)
6718 {
6719         __free_page(spd->pages[idx]);
6720 }
6721
6722 static size_t
6723 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6724 {
6725         size_t count;
6726         int save_len;
6727         int ret;
6728
6729         /* Seq buffer is page-sized, exactly what we need. */
6730         for (;;) {
6731                 save_len = iter->seq.seq.len;
6732                 ret = print_trace_line(iter);
6733
6734                 if (trace_seq_has_overflowed(&iter->seq)) {
6735                         iter->seq.seq.len = save_len;
6736                         break;
6737                 }
6738
6739                 /*
6740                  * This should not be hit, because it should only
6741                  * be set if the iter->seq overflowed. But check it
6742                  * anyway to be safe.
6743                  */
6744                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6745                         iter->seq.seq.len = save_len;
6746                         break;
6747                 }
6748
6749                 count = trace_seq_used(&iter->seq) - save_len;
6750                 if (rem < count) {
6751                         rem = 0;
6752                         iter->seq.seq.len = save_len;
6753                         break;
6754                 }
6755
6756                 if (ret != TRACE_TYPE_NO_CONSUME)
6757                         trace_consume(iter);
6758                 rem -= count;
6759                 if (!trace_find_next_entry_inc(iter))   {
6760                         rem = 0;
6761                         iter->ent = NULL;
6762                         break;
6763                 }
6764         }
6765
6766         return rem;
6767 }
6768
6769 static ssize_t tracing_splice_read_pipe(struct file *filp,
6770                                         loff_t *ppos,
6771                                         struct pipe_inode_info *pipe,
6772                                         size_t len,
6773                                         unsigned int flags)
6774 {
6775         struct page *pages_def[PIPE_DEF_BUFFERS];
6776         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6777         struct trace_iterator *iter = filp->private_data;
6778         struct splice_pipe_desc spd = {
6779                 .pages          = pages_def,
6780                 .partial        = partial_def,
6781                 .nr_pages       = 0, /* This gets updated below. */
6782                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6783                 .ops            = &default_pipe_buf_ops,
6784                 .spd_release    = tracing_spd_release_pipe,
6785         };
6786         ssize_t ret;
6787         size_t rem;
6788         unsigned int i;
6789
6790         if (splice_grow_spd(pipe, &spd))
6791                 return -ENOMEM;
6792
6793         mutex_lock(&iter->mutex);
6794
6795         if (iter->trace->splice_read) {
6796                 ret = iter->trace->splice_read(iter, filp,
6797                                                ppos, pipe, len, flags);
6798                 if (ret)
6799                         goto out_err;
6800         }
6801
6802         ret = tracing_wait_pipe(filp);
6803         if (ret <= 0)
6804                 goto out_err;
6805
6806         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6807                 ret = -EFAULT;
6808                 goto out_err;
6809         }
6810
6811         trace_event_read_lock();
6812         trace_access_lock(iter->cpu_file);
6813
6814         /* Fill as many pages as possible. */
6815         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6816                 spd.pages[i] = alloc_page(GFP_KERNEL);
6817                 if (!spd.pages[i])
6818                         break;
6819
6820                 rem = tracing_fill_pipe_page(rem, iter);
6821
6822                 /* Copy the data into the page, so we can start over. */
6823                 ret = trace_seq_to_buffer(&iter->seq,
6824                                           page_address(spd.pages[i]),
6825                                           trace_seq_used(&iter->seq));
6826                 if (ret < 0) {
6827                         __free_page(spd.pages[i]);
6828                         break;
6829                 }
6830                 spd.partial[i].offset = 0;
6831                 spd.partial[i].len = trace_seq_used(&iter->seq);
6832
6833                 trace_seq_init(&iter->seq);
6834         }
6835
6836         trace_access_unlock(iter->cpu_file);
6837         trace_event_read_unlock();
6838         mutex_unlock(&iter->mutex);
6839
6840         spd.nr_pages = i;
6841
6842         if (i)
6843                 ret = splice_to_pipe(pipe, &spd);
6844         else
6845                 ret = 0;
6846 out:
6847         splice_shrink_spd(&spd);
6848         return ret;
6849
6850 out_err:
6851         mutex_unlock(&iter->mutex);
6852         goto out;
6853 }
6854
6855 static ssize_t
6856 tracing_entries_read(struct file *filp, char __user *ubuf,
6857                      size_t cnt, loff_t *ppos)
6858 {
6859         struct inode *inode = file_inode(filp);
6860         struct trace_array *tr = inode->i_private;
6861         int cpu = tracing_get_cpu(inode);
6862         char buf[64];
6863         int r = 0;
6864         ssize_t ret;
6865
6866         mutex_lock(&trace_types_lock);
6867
6868         if (cpu == RING_BUFFER_ALL_CPUS) {
6869                 int cpu, buf_size_same;
6870                 unsigned long size;
6871
6872                 size = 0;
6873                 buf_size_same = 1;
6874                 /* check if all cpu sizes are same */
6875                 for_each_tracing_cpu(cpu) {
6876                         /* fill in the size from first enabled cpu */
6877                         if (size == 0)
6878                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6879                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6880                                 buf_size_same = 0;
6881                                 break;
6882                         }
6883                 }
6884
6885                 if (buf_size_same) {
6886                         if (!ring_buffer_expanded)
6887                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6888                                             size >> 10,
6889                                             trace_buf_size >> 10);
6890                         else
6891                                 r = sprintf(buf, "%lu\n", size >> 10);
6892                 } else
6893                         r = sprintf(buf, "X\n");
6894         } else
6895                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6896
6897         mutex_unlock(&trace_types_lock);
6898
6899         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6900         return ret;
6901 }
6902
6903 static ssize_t
6904 tracing_entries_write(struct file *filp, const char __user *ubuf,
6905                       size_t cnt, loff_t *ppos)
6906 {
6907         struct inode *inode = file_inode(filp);
6908         struct trace_array *tr = inode->i_private;
6909         unsigned long val;
6910         int ret;
6911
6912         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6913         if (ret)
6914                 return ret;
6915
6916         /* must have at least 1 entry */
6917         if (!val)
6918                 return -EINVAL;
6919
6920         /* value is in KB */
6921         val <<= 10;
6922         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6923         if (ret < 0)
6924                 return ret;
6925
6926         *ppos += cnt;
6927
6928         return cnt;
6929 }
6930
6931 static ssize_t
6932 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6933                                 size_t cnt, loff_t *ppos)
6934 {
6935         struct trace_array *tr = filp->private_data;
6936         char buf[64];
6937         int r, cpu;
6938         unsigned long size = 0, expanded_size = 0;
6939
6940         mutex_lock(&trace_types_lock);
6941         for_each_tracing_cpu(cpu) {
6942                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6943                 if (!ring_buffer_expanded)
6944                         expanded_size += trace_buf_size >> 10;
6945         }
6946         if (ring_buffer_expanded)
6947                 r = sprintf(buf, "%lu\n", size);
6948         else
6949                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6950         mutex_unlock(&trace_types_lock);
6951
6952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6953 }
6954
6955 static ssize_t
6956 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6957                           size_t cnt, loff_t *ppos)
6958 {
6959         /*
6960          * There is no need to read what the user has written, this function
6961          * is just to make sure that there is no error when "echo" is used
6962          */
6963
6964         *ppos += cnt;
6965
6966         return cnt;
6967 }
6968
6969 static int
6970 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6971 {
6972         struct trace_array *tr = inode->i_private;
6973
6974         /* disable tracing ? */
6975         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6976                 tracer_tracing_off(tr);
6977         /* resize the ring buffer to 0 */
6978         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6979
6980         trace_array_put(tr);
6981
6982         return 0;
6983 }
6984
6985 static ssize_t
6986 tracing_mark_write(struct file *filp, const char __user *ubuf,
6987                                         size_t cnt, loff_t *fpos)
6988 {
6989         struct trace_array *tr = filp->private_data;
6990         struct ring_buffer_event *event;
6991         enum event_trigger_type tt = ETT_NONE;
6992         struct trace_buffer *buffer;
6993         struct print_entry *entry;
6994         ssize_t written;
6995         int size;
6996         int len;
6997
6998 /* Used in tracing_mark_raw_write() as well */
6999 #define FAULTED_STR "<faulted>"
7000 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7001
7002         if (tracing_disabled)
7003                 return -EINVAL;
7004
7005         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7006                 return -EINVAL;
7007
7008         if (cnt > TRACE_BUF_SIZE)
7009                 cnt = TRACE_BUF_SIZE;
7010
7011         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7012
7013         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7014
7015         /* If less than "<faulted>", then make sure we can still add that */
7016         if (cnt < FAULTED_SIZE)
7017                 size += FAULTED_SIZE - cnt;
7018
7019         buffer = tr->array_buffer.buffer;
7020         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7021                                             tracing_gen_ctx());
7022         if (unlikely(!event))
7023                 /* Ring buffer disabled, return as if not open for write */
7024                 return -EBADF;
7025
7026         entry = ring_buffer_event_data(event);
7027         entry->ip = _THIS_IP_;
7028
7029         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7030         if (len) {
7031                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7032                 cnt = FAULTED_SIZE;
7033                 written = -EFAULT;
7034         } else
7035                 written = cnt;
7036
7037         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7038                 /* do not add \n before testing triggers, but add \0 */
7039                 entry->buf[cnt] = '\0';
7040                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7041         }
7042
7043         if (entry->buf[cnt - 1] != '\n') {
7044                 entry->buf[cnt] = '\n';
7045                 entry->buf[cnt + 1] = '\0';
7046         } else
7047                 entry->buf[cnt] = '\0';
7048
7049         if (static_branch_unlikely(&trace_marker_exports_enabled))
7050                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7051         __buffer_unlock_commit(buffer, event);
7052
7053         if (tt)
7054                 event_triggers_post_call(tr->trace_marker_file, tt);
7055
7056         if (written > 0)
7057                 *fpos += written;
7058
7059         return written;
7060 }
7061
7062 /* Limit it for now to 3K (including tag) */
7063 #define RAW_DATA_MAX_SIZE (1024*3)
7064
7065 static ssize_t
7066 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7067                                         size_t cnt, loff_t *fpos)
7068 {
7069         struct trace_array *tr = filp->private_data;
7070         struct ring_buffer_event *event;
7071         struct trace_buffer *buffer;
7072         struct raw_data_entry *entry;
7073         ssize_t written;
7074         int size;
7075         int len;
7076
7077 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7078
7079         if (tracing_disabled)
7080                 return -EINVAL;
7081
7082         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7083                 return -EINVAL;
7084
7085         /* The marker must at least have a tag id */
7086         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7087                 return -EINVAL;
7088
7089         if (cnt > TRACE_BUF_SIZE)
7090                 cnt = TRACE_BUF_SIZE;
7091
7092         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7093
7094         size = sizeof(*entry) + cnt;
7095         if (cnt < FAULT_SIZE_ID)
7096                 size += FAULT_SIZE_ID - cnt;
7097
7098         buffer = tr->array_buffer.buffer;
7099         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7100                                             tracing_gen_ctx());
7101         if (!event)
7102                 /* Ring buffer disabled, return as if not open for write */
7103                 return -EBADF;
7104
7105         entry = ring_buffer_event_data(event);
7106
7107         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7108         if (len) {
7109                 entry->id = -1;
7110                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7111                 written = -EFAULT;
7112         } else
7113                 written = cnt;
7114
7115         __buffer_unlock_commit(buffer, event);
7116
7117         if (written > 0)
7118                 *fpos += written;
7119
7120         return written;
7121 }
7122
7123 static int tracing_clock_show(struct seq_file *m, void *v)
7124 {
7125         struct trace_array *tr = m->private;
7126         int i;
7127
7128         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7129                 seq_printf(m,
7130                         "%s%s%s%s", i ? " " : "",
7131                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7132                         i == tr->clock_id ? "]" : "");
7133         seq_putc(m, '\n');
7134
7135         return 0;
7136 }
7137
7138 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7139 {
7140         int i;
7141
7142         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7143                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7144                         break;
7145         }
7146         if (i == ARRAY_SIZE(trace_clocks))
7147                 return -EINVAL;
7148
7149         mutex_lock(&trace_types_lock);
7150
7151         tr->clock_id = i;
7152
7153         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7154
7155         /*
7156          * New clock may not be consistent with the previous clock.
7157          * Reset the buffer so that it doesn't have incomparable timestamps.
7158          */
7159         tracing_reset_online_cpus(&tr->array_buffer);
7160
7161 #ifdef CONFIG_TRACER_MAX_TRACE
7162         if (tr->max_buffer.buffer)
7163                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7164         tracing_reset_online_cpus(&tr->max_buffer);
7165 #endif
7166
7167         mutex_unlock(&trace_types_lock);
7168
7169         return 0;
7170 }
7171
7172 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7173                                    size_t cnt, loff_t *fpos)
7174 {
7175         struct seq_file *m = filp->private_data;
7176         struct trace_array *tr = m->private;
7177         char buf[64];
7178         const char *clockstr;
7179         int ret;
7180
7181         if (cnt >= sizeof(buf))
7182                 return -EINVAL;
7183
7184         if (copy_from_user(buf, ubuf, cnt))
7185                 return -EFAULT;
7186
7187         buf[cnt] = 0;
7188
7189         clockstr = strstrip(buf);
7190
7191         ret = tracing_set_clock(tr, clockstr);
7192         if (ret)
7193                 return ret;
7194
7195         *fpos += cnt;
7196
7197         return cnt;
7198 }
7199
7200 static int tracing_clock_open(struct inode *inode, struct file *file)
7201 {
7202         struct trace_array *tr = inode->i_private;
7203         int ret;
7204
7205         ret = tracing_check_open_get_tr(tr);
7206         if (ret)
7207                 return ret;
7208
7209         ret = single_open(file, tracing_clock_show, inode->i_private);
7210         if (ret < 0)
7211                 trace_array_put(tr);
7212
7213         return ret;
7214 }
7215
7216 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7217 {
7218         struct trace_array *tr = m->private;
7219
7220         mutex_lock(&trace_types_lock);
7221
7222         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7223                 seq_puts(m, "delta [absolute]\n");
7224         else
7225                 seq_puts(m, "[delta] absolute\n");
7226
7227         mutex_unlock(&trace_types_lock);
7228
7229         return 0;
7230 }
7231
7232 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7233 {
7234         struct trace_array *tr = inode->i_private;
7235         int ret;
7236
7237         ret = tracing_check_open_get_tr(tr);
7238         if (ret)
7239                 return ret;
7240
7241         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7242         if (ret < 0)
7243                 trace_array_put(tr);
7244
7245         return ret;
7246 }
7247
7248 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7249 {
7250         if (rbe == this_cpu_read(trace_buffered_event))
7251                 return ring_buffer_time_stamp(buffer);
7252
7253         return ring_buffer_event_time_stamp(buffer, rbe);
7254 }
7255
7256 /*
7257  * Set or disable using the per CPU trace_buffer_event when possible.
7258  */
7259 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7260 {
7261         int ret = 0;
7262
7263         mutex_lock(&trace_types_lock);
7264
7265         if (set && tr->no_filter_buffering_ref++)
7266                 goto out;
7267
7268         if (!set) {
7269                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7270                         ret = -EINVAL;
7271                         goto out;
7272                 }
7273
7274                 --tr->no_filter_buffering_ref;
7275         }
7276  out:
7277         mutex_unlock(&trace_types_lock);
7278
7279         return ret;
7280 }
7281
7282 struct ftrace_buffer_info {
7283         struct trace_iterator   iter;
7284         void                    *spare;
7285         unsigned int            spare_cpu;
7286         unsigned int            read;
7287 };
7288
7289 #ifdef CONFIG_TRACER_SNAPSHOT
7290 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7291 {
7292         struct trace_array *tr = inode->i_private;
7293         struct trace_iterator *iter;
7294         struct seq_file *m;
7295         int ret;
7296
7297         ret = tracing_check_open_get_tr(tr);
7298         if (ret)
7299                 return ret;
7300
7301         if (file->f_mode & FMODE_READ) {
7302                 iter = __tracing_open(inode, file, true);
7303                 if (IS_ERR(iter))
7304                         ret = PTR_ERR(iter);
7305         } else {
7306                 /* Writes still need the seq_file to hold the private data */
7307                 ret = -ENOMEM;
7308                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7309                 if (!m)
7310                         goto out;
7311                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7312                 if (!iter) {
7313                         kfree(m);
7314                         goto out;
7315                 }
7316                 ret = 0;
7317
7318                 iter->tr = tr;
7319                 iter->array_buffer = &tr->max_buffer;
7320                 iter->cpu_file = tracing_get_cpu(inode);
7321                 m->private = iter;
7322                 file->private_data = m;
7323         }
7324 out:
7325         if (ret < 0)
7326                 trace_array_put(tr);
7327
7328         return ret;
7329 }
7330
7331 static ssize_t
7332 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7333                        loff_t *ppos)
7334 {
7335         struct seq_file *m = filp->private_data;
7336         struct trace_iterator *iter = m->private;
7337         struct trace_array *tr = iter->tr;
7338         unsigned long val;
7339         int ret;
7340
7341         ret = tracing_update_buffers();
7342         if (ret < 0)
7343                 return ret;
7344
7345         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7346         if (ret)
7347                 return ret;
7348
7349         mutex_lock(&trace_types_lock);
7350
7351         if (tr->current_trace->use_max_tr) {
7352                 ret = -EBUSY;
7353                 goto out;
7354         }
7355
7356         arch_spin_lock(&tr->max_lock);
7357         if (tr->cond_snapshot)
7358                 ret = -EBUSY;
7359         arch_spin_unlock(&tr->max_lock);
7360         if (ret)
7361                 goto out;
7362
7363         switch (val) {
7364         case 0:
7365                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7366                         ret = -EINVAL;
7367                         break;
7368                 }
7369                 if (tr->allocated_snapshot)
7370                         free_snapshot(tr);
7371                 break;
7372         case 1:
7373 /* Only allow per-cpu swap if the ring buffer supports it */
7374 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7375                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7376                         ret = -EINVAL;
7377                         break;
7378                 }
7379 #endif
7380                 if (tr->allocated_snapshot)
7381                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7382                                         &tr->array_buffer, iter->cpu_file);
7383                 else
7384                         ret = tracing_alloc_snapshot_instance(tr);
7385                 if (ret < 0)
7386                         break;
7387                 local_irq_disable();
7388                 /* Now, we're going to swap */
7389                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7390                         update_max_tr(tr, current, smp_processor_id(), NULL);
7391                 else
7392                         update_max_tr_single(tr, current, iter->cpu_file);
7393                 local_irq_enable();
7394                 break;
7395         default:
7396                 if (tr->allocated_snapshot) {
7397                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7398                                 tracing_reset_online_cpus(&tr->max_buffer);
7399                         else
7400                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7401                 }
7402                 break;
7403         }
7404
7405         if (ret >= 0) {
7406                 *ppos += cnt;
7407                 ret = cnt;
7408         }
7409 out:
7410         mutex_unlock(&trace_types_lock);
7411         return ret;
7412 }
7413
7414 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7415 {
7416         struct seq_file *m = file->private_data;
7417         int ret;
7418
7419         ret = tracing_release(inode, file);
7420
7421         if (file->f_mode & FMODE_READ)
7422                 return ret;
7423
7424         /* If write only, the seq_file is just a stub */
7425         if (m)
7426                 kfree(m->private);
7427         kfree(m);
7428
7429         return 0;
7430 }
7431
7432 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7433 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7434                                     size_t count, loff_t *ppos);
7435 static int tracing_buffers_release(struct inode *inode, struct file *file);
7436 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7437                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7438
7439 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7440 {
7441         struct ftrace_buffer_info *info;
7442         int ret;
7443
7444         /* The following checks for tracefs lockdown */
7445         ret = tracing_buffers_open(inode, filp);
7446         if (ret < 0)
7447                 return ret;
7448
7449         info = filp->private_data;
7450
7451         if (info->iter.trace->use_max_tr) {
7452                 tracing_buffers_release(inode, filp);
7453                 return -EBUSY;
7454         }
7455
7456         info->iter.snapshot = true;
7457         info->iter.array_buffer = &info->iter.tr->max_buffer;
7458
7459         return ret;
7460 }
7461
7462 #endif /* CONFIG_TRACER_SNAPSHOT */
7463
7464
7465 static const struct file_operations tracing_thresh_fops = {
7466         .open           = tracing_open_generic,
7467         .read           = tracing_thresh_read,
7468         .write          = tracing_thresh_write,
7469         .llseek         = generic_file_llseek,
7470 };
7471
7472 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7473 static const struct file_operations tracing_max_lat_fops = {
7474         .open           = tracing_open_generic,
7475         .read           = tracing_max_lat_read,
7476         .write          = tracing_max_lat_write,
7477         .llseek         = generic_file_llseek,
7478 };
7479 #endif
7480
7481 static const struct file_operations set_tracer_fops = {
7482         .open           = tracing_open_generic,
7483         .read           = tracing_set_trace_read,
7484         .write          = tracing_set_trace_write,
7485         .llseek         = generic_file_llseek,
7486 };
7487
7488 static const struct file_operations tracing_pipe_fops = {
7489         .open           = tracing_open_pipe,
7490         .poll           = tracing_poll_pipe,
7491         .read           = tracing_read_pipe,
7492         .splice_read    = tracing_splice_read_pipe,
7493         .release        = tracing_release_pipe,
7494         .llseek         = no_llseek,
7495 };
7496
7497 static const struct file_operations tracing_entries_fops = {
7498         .open           = tracing_open_generic_tr,
7499         .read           = tracing_entries_read,
7500         .write          = tracing_entries_write,
7501         .llseek         = generic_file_llseek,
7502         .release        = tracing_release_generic_tr,
7503 };
7504
7505 static const struct file_operations tracing_total_entries_fops = {
7506         .open           = tracing_open_generic_tr,
7507         .read           = tracing_total_entries_read,
7508         .llseek         = generic_file_llseek,
7509         .release        = tracing_release_generic_tr,
7510 };
7511
7512 static const struct file_operations tracing_free_buffer_fops = {
7513         .open           = tracing_open_generic_tr,
7514         .write          = tracing_free_buffer_write,
7515         .release        = tracing_free_buffer_release,
7516 };
7517
7518 static const struct file_operations tracing_mark_fops = {
7519         .open           = tracing_open_generic_tr,
7520         .write          = tracing_mark_write,
7521         .llseek         = generic_file_llseek,
7522         .release        = tracing_release_generic_tr,
7523 };
7524
7525 static const struct file_operations tracing_mark_raw_fops = {
7526         .open           = tracing_open_generic_tr,
7527         .write          = tracing_mark_raw_write,
7528         .llseek         = generic_file_llseek,
7529         .release        = tracing_release_generic_tr,
7530 };
7531
7532 static const struct file_operations trace_clock_fops = {
7533         .open           = tracing_clock_open,
7534         .read           = seq_read,
7535         .llseek         = seq_lseek,
7536         .release        = tracing_single_release_tr,
7537         .write          = tracing_clock_write,
7538 };
7539
7540 static const struct file_operations trace_time_stamp_mode_fops = {
7541         .open           = tracing_time_stamp_mode_open,
7542         .read           = seq_read,
7543         .llseek         = seq_lseek,
7544         .release        = tracing_single_release_tr,
7545 };
7546
7547 #ifdef CONFIG_TRACER_SNAPSHOT
7548 static const struct file_operations snapshot_fops = {
7549         .open           = tracing_snapshot_open,
7550         .read           = seq_read,
7551         .write          = tracing_snapshot_write,
7552         .llseek         = tracing_lseek,
7553         .release        = tracing_snapshot_release,
7554 };
7555
7556 static const struct file_operations snapshot_raw_fops = {
7557         .open           = snapshot_raw_open,
7558         .read           = tracing_buffers_read,
7559         .release        = tracing_buffers_release,
7560         .splice_read    = tracing_buffers_splice_read,
7561         .llseek         = no_llseek,
7562 };
7563
7564 #endif /* CONFIG_TRACER_SNAPSHOT */
7565
7566 #define TRACING_LOG_ERRS_MAX    8
7567 #define TRACING_LOG_LOC_MAX     128
7568
7569 #define CMD_PREFIX "  Command: "
7570
7571 struct err_info {
7572         const char      **errs; /* ptr to loc-specific array of err strings */
7573         u8              type;   /* index into errs -> specific err string */
7574         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7575         u64             ts;
7576 };
7577
7578 struct tracing_log_err {
7579         struct list_head        list;
7580         struct err_info         info;
7581         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7582         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7583 };
7584
7585 static DEFINE_MUTEX(tracing_err_log_lock);
7586
7587 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7588 {
7589         struct tracing_log_err *err;
7590
7591         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7592                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7593                 if (!err)
7594                         err = ERR_PTR(-ENOMEM);
7595                 tr->n_err_log_entries++;
7596
7597                 return err;
7598         }
7599
7600         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7601         list_del(&err->list);
7602
7603         return err;
7604 }
7605
7606 /**
7607  * err_pos - find the position of a string within a command for error careting
7608  * @cmd: The tracing command that caused the error
7609  * @str: The string to position the caret at within @cmd
7610  *
7611  * Finds the position of the first occurrence of @str within @cmd.  The
7612  * return value can be passed to tracing_log_err() for caret placement
7613  * within @cmd.
7614  *
7615  * Returns the index within @cmd of the first occurrence of @str or 0
7616  * if @str was not found.
7617  */
7618 unsigned int err_pos(char *cmd, const char *str)
7619 {
7620         char *found;
7621
7622         if (WARN_ON(!strlen(cmd)))
7623                 return 0;
7624
7625         found = strstr(cmd, str);
7626         if (found)
7627                 return found - cmd;
7628
7629         return 0;
7630 }
7631
7632 /**
7633  * tracing_log_err - write an error to the tracing error log
7634  * @tr: The associated trace array for the error (NULL for top level array)
7635  * @loc: A string describing where the error occurred
7636  * @cmd: The tracing command that caused the error
7637  * @errs: The array of loc-specific static error strings
7638  * @type: The index into errs[], which produces the specific static err string
7639  * @pos: The position the caret should be placed in the cmd
7640  *
7641  * Writes an error into tracing/error_log of the form:
7642  *
7643  * <loc>: error: <text>
7644  *   Command: <cmd>
7645  *              ^
7646  *
7647  * tracing/error_log is a small log file containing the last
7648  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7649  * unless there has been a tracing error, and the error log can be
7650  * cleared and have its memory freed by writing the empty string in
7651  * truncation mode to it i.e. echo > tracing/error_log.
7652  *
7653  * NOTE: the @errs array along with the @type param are used to
7654  * produce a static error string - this string is not copied and saved
7655  * when the error is logged - only a pointer to it is saved.  See
7656  * existing callers for examples of how static strings are typically
7657  * defined for use with tracing_log_err().
7658  */
7659 void tracing_log_err(struct trace_array *tr,
7660                      const char *loc, const char *cmd,
7661                      const char **errs, u8 type, u8 pos)
7662 {
7663         struct tracing_log_err *err;
7664
7665         if (!tr)
7666                 tr = &global_trace;
7667
7668         mutex_lock(&tracing_err_log_lock);
7669         err = get_tracing_log_err(tr);
7670         if (PTR_ERR(err) == -ENOMEM) {
7671                 mutex_unlock(&tracing_err_log_lock);
7672                 return;
7673         }
7674
7675         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7676         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7677
7678         err->info.errs = errs;
7679         err->info.type = type;
7680         err->info.pos = pos;
7681         err->info.ts = local_clock();
7682
7683         list_add_tail(&err->list, &tr->err_log);
7684         mutex_unlock(&tracing_err_log_lock);
7685 }
7686
7687 static void clear_tracing_err_log(struct trace_array *tr)
7688 {
7689         struct tracing_log_err *err, *next;
7690
7691         mutex_lock(&tracing_err_log_lock);
7692         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7693                 list_del(&err->list);
7694                 kfree(err);
7695         }
7696
7697         tr->n_err_log_entries = 0;
7698         mutex_unlock(&tracing_err_log_lock);
7699 }
7700
7701 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7702 {
7703         struct trace_array *tr = m->private;
7704
7705         mutex_lock(&tracing_err_log_lock);
7706
7707         return seq_list_start(&tr->err_log, *pos);
7708 }
7709
7710 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7711 {
7712         struct trace_array *tr = m->private;
7713
7714         return seq_list_next(v, &tr->err_log, pos);
7715 }
7716
7717 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7718 {
7719         mutex_unlock(&tracing_err_log_lock);
7720 }
7721
7722 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7723 {
7724         u8 i;
7725
7726         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7727                 seq_putc(m, ' ');
7728         for (i = 0; i < pos; i++)
7729                 seq_putc(m, ' ');
7730         seq_puts(m, "^\n");
7731 }
7732
7733 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7734 {
7735         struct tracing_log_err *err = v;
7736
7737         if (err) {
7738                 const char *err_text = err->info.errs[err->info.type];
7739                 u64 sec = err->info.ts;
7740                 u32 nsec;
7741
7742                 nsec = do_div(sec, NSEC_PER_SEC);
7743                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7744                            err->loc, err_text);
7745                 seq_printf(m, "%s", err->cmd);
7746                 tracing_err_log_show_pos(m, err->info.pos);
7747         }
7748
7749         return 0;
7750 }
7751
7752 static const struct seq_operations tracing_err_log_seq_ops = {
7753         .start  = tracing_err_log_seq_start,
7754         .next   = tracing_err_log_seq_next,
7755         .stop   = tracing_err_log_seq_stop,
7756         .show   = tracing_err_log_seq_show
7757 };
7758
7759 static int tracing_err_log_open(struct inode *inode, struct file *file)
7760 {
7761         struct trace_array *tr = inode->i_private;
7762         int ret = 0;
7763
7764         ret = tracing_check_open_get_tr(tr);
7765         if (ret)
7766                 return ret;
7767
7768         /* If this file was opened for write, then erase contents */
7769         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7770                 clear_tracing_err_log(tr);
7771
7772         if (file->f_mode & FMODE_READ) {
7773                 ret = seq_open(file, &tracing_err_log_seq_ops);
7774                 if (!ret) {
7775                         struct seq_file *m = file->private_data;
7776                         m->private = tr;
7777                 } else {
7778                         trace_array_put(tr);
7779                 }
7780         }
7781         return ret;
7782 }
7783
7784 static ssize_t tracing_err_log_write(struct file *file,
7785                                      const char __user *buffer,
7786                                      size_t count, loff_t *ppos)
7787 {
7788         return count;
7789 }
7790
7791 static int tracing_err_log_release(struct inode *inode, struct file *file)
7792 {
7793         struct trace_array *tr = inode->i_private;
7794
7795         trace_array_put(tr);
7796
7797         if (file->f_mode & FMODE_READ)
7798                 seq_release(inode, file);
7799
7800         return 0;
7801 }
7802
7803 static const struct file_operations tracing_err_log_fops = {
7804         .open           = tracing_err_log_open,
7805         .write          = tracing_err_log_write,
7806         .read           = seq_read,
7807         .llseek         = seq_lseek,
7808         .release        = tracing_err_log_release,
7809 };
7810
7811 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7812 {
7813         struct trace_array *tr = inode->i_private;
7814         struct ftrace_buffer_info *info;
7815         int ret;
7816
7817         ret = tracing_check_open_get_tr(tr);
7818         if (ret)
7819                 return ret;
7820
7821         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7822         if (!info) {
7823                 trace_array_put(tr);
7824                 return -ENOMEM;
7825         }
7826
7827         mutex_lock(&trace_types_lock);
7828
7829         info->iter.tr           = tr;
7830         info->iter.cpu_file     = tracing_get_cpu(inode);
7831         info->iter.trace        = tr->current_trace;
7832         info->iter.array_buffer = &tr->array_buffer;
7833         info->spare             = NULL;
7834         /* Force reading ring buffer for first read */
7835         info->read              = (unsigned int)-1;
7836
7837         filp->private_data = info;
7838
7839         tr->trace_ref++;
7840
7841         mutex_unlock(&trace_types_lock);
7842
7843         ret = nonseekable_open(inode, filp);
7844         if (ret < 0)
7845                 trace_array_put(tr);
7846
7847         return ret;
7848 }
7849
7850 static __poll_t
7851 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7852 {
7853         struct ftrace_buffer_info *info = filp->private_data;
7854         struct trace_iterator *iter = &info->iter;
7855
7856         return trace_poll(iter, filp, poll_table);
7857 }
7858
7859 static ssize_t
7860 tracing_buffers_read(struct file *filp, char __user *ubuf,
7861                      size_t count, loff_t *ppos)
7862 {
7863         struct ftrace_buffer_info *info = filp->private_data;
7864         struct trace_iterator *iter = &info->iter;
7865         ssize_t ret = 0;
7866         ssize_t size;
7867
7868         if (!count)
7869                 return 0;
7870
7871 #ifdef CONFIG_TRACER_MAX_TRACE
7872         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7873                 return -EBUSY;
7874 #endif
7875
7876         if (!info->spare) {
7877                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7878                                                           iter->cpu_file);
7879                 if (IS_ERR(info->spare)) {
7880                         ret = PTR_ERR(info->spare);
7881                         info->spare = NULL;
7882                 } else {
7883                         info->spare_cpu = iter->cpu_file;
7884                 }
7885         }
7886         if (!info->spare)
7887                 return ret;
7888
7889         /* Do we have previous read data to read? */
7890         if (info->read < PAGE_SIZE)
7891                 goto read;
7892
7893  again:
7894         trace_access_lock(iter->cpu_file);
7895         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7896                                     &info->spare,
7897                                     count,
7898                                     iter->cpu_file, 0);
7899         trace_access_unlock(iter->cpu_file);
7900
7901         if (ret < 0) {
7902                 if (trace_empty(iter)) {
7903                         if ((filp->f_flags & O_NONBLOCK))
7904                                 return -EAGAIN;
7905
7906                         ret = wait_on_pipe(iter, 0);
7907                         if (ret)
7908                                 return ret;
7909
7910                         goto again;
7911                 }
7912                 return 0;
7913         }
7914
7915         info->read = 0;
7916  read:
7917         size = PAGE_SIZE - info->read;
7918         if (size > count)
7919                 size = count;
7920
7921         ret = copy_to_user(ubuf, info->spare + info->read, size);
7922         if (ret == size)
7923                 return -EFAULT;
7924
7925         size -= ret;
7926
7927         *ppos += size;
7928         info->read += size;
7929
7930         return size;
7931 }
7932
7933 static int tracing_buffers_release(struct inode *inode, struct file *file)
7934 {
7935         struct ftrace_buffer_info *info = file->private_data;
7936         struct trace_iterator *iter = &info->iter;
7937
7938         mutex_lock(&trace_types_lock);
7939
7940         iter->tr->trace_ref--;
7941
7942         __trace_array_put(iter->tr);
7943
7944         if (info->spare)
7945                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7946                                            info->spare_cpu, info->spare);
7947         kvfree(info);
7948
7949         mutex_unlock(&trace_types_lock);
7950
7951         return 0;
7952 }
7953
7954 struct buffer_ref {
7955         struct trace_buffer     *buffer;
7956         void                    *page;
7957         int                     cpu;
7958         refcount_t              refcount;
7959 };
7960
7961 static void buffer_ref_release(struct buffer_ref *ref)
7962 {
7963         if (!refcount_dec_and_test(&ref->refcount))
7964                 return;
7965         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7966         kfree(ref);
7967 }
7968
7969 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7970                                     struct pipe_buffer *buf)
7971 {
7972         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7973
7974         buffer_ref_release(ref);
7975         buf->private = 0;
7976 }
7977
7978 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7979                                 struct pipe_buffer *buf)
7980 {
7981         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7982
7983         if (refcount_read(&ref->refcount) > INT_MAX/2)
7984                 return false;
7985
7986         refcount_inc(&ref->refcount);
7987         return true;
7988 }
7989
7990 /* Pipe buffer operations for a buffer. */
7991 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7992         .release                = buffer_pipe_buf_release,
7993         .get                    = buffer_pipe_buf_get,
7994 };
7995
7996 /*
7997  * Callback from splice_to_pipe(), if we need to release some pages
7998  * at the end of the spd in case we error'ed out in filling the pipe.
7999  */
8000 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8001 {
8002         struct buffer_ref *ref =
8003                 (struct buffer_ref *)spd->partial[i].private;
8004
8005         buffer_ref_release(ref);
8006         spd->partial[i].private = 0;
8007 }
8008
8009 static ssize_t
8010 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8011                             struct pipe_inode_info *pipe, size_t len,
8012                             unsigned int flags)
8013 {
8014         struct ftrace_buffer_info *info = file->private_data;
8015         struct trace_iterator *iter = &info->iter;
8016         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8017         struct page *pages_def[PIPE_DEF_BUFFERS];
8018         struct splice_pipe_desc spd = {
8019                 .pages          = pages_def,
8020                 .partial        = partial_def,
8021                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8022                 .ops            = &buffer_pipe_buf_ops,
8023                 .spd_release    = buffer_spd_release,
8024         };
8025         struct buffer_ref *ref;
8026         int entries, i;
8027         ssize_t ret = 0;
8028
8029 #ifdef CONFIG_TRACER_MAX_TRACE
8030         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8031                 return -EBUSY;
8032 #endif
8033
8034         if (*ppos & (PAGE_SIZE - 1))
8035                 return -EINVAL;
8036
8037         if (len & (PAGE_SIZE - 1)) {
8038                 if (len < PAGE_SIZE)
8039                         return -EINVAL;
8040                 len &= PAGE_MASK;
8041         }
8042
8043         if (splice_grow_spd(pipe, &spd))
8044                 return -ENOMEM;
8045
8046  again:
8047         trace_access_lock(iter->cpu_file);
8048         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8049
8050         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8051                 struct page *page;
8052                 int r;
8053
8054                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8055                 if (!ref) {
8056                         ret = -ENOMEM;
8057                         break;
8058                 }
8059
8060                 refcount_set(&ref->refcount, 1);
8061                 ref->buffer = iter->array_buffer->buffer;
8062                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8063                 if (IS_ERR(ref->page)) {
8064                         ret = PTR_ERR(ref->page);
8065                         ref->page = NULL;
8066                         kfree(ref);
8067                         break;
8068                 }
8069                 ref->cpu = iter->cpu_file;
8070
8071                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8072                                           len, iter->cpu_file, 1);
8073                 if (r < 0) {
8074                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8075                                                    ref->page);
8076                         kfree(ref);
8077                         break;
8078                 }
8079
8080                 page = virt_to_page(ref->page);
8081
8082                 spd.pages[i] = page;
8083                 spd.partial[i].len = PAGE_SIZE;
8084                 spd.partial[i].offset = 0;
8085                 spd.partial[i].private = (unsigned long)ref;
8086                 spd.nr_pages++;
8087                 *ppos += PAGE_SIZE;
8088
8089                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8090         }
8091
8092         trace_access_unlock(iter->cpu_file);
8093         spd.nr_pages = i;
8094
8095         /* did we read anything? */
8096         if (!spd.nr_pages) {
8097                 if (ret)
8098                         goto out;
8099
8100                 ret = -EAGAIN;
8101                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8102                         goto out;
8103
8104                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8105                 if (ret)
8106                         goto out;
8107
8108                 goto again;
8109         }
8110
8111         ret = splice_to_pipe(pipe, &spd);
8112 out:
8113         splice_shrink_spd(&spd);
8114
8115         return ret;
8116 }
8117
8118 static const struct file_operations tracing_buffers_fops = {
8119         .open           = tracing_buffers_open,
8120         .read           = tracing_buffers_read,
8121         .poll           = tracing_buffers_poll,
8122         .release        = tracing_buffers_release,
8123         .splice_read    = tracing_buffers_splice_read,
8124         .llseek         = no_llseek,
8125 };
8126
8127 static ssize_t
8128 tracing_stats_read(struct file *filp, char __user *ubuf,
8129                    size_t count, loff_t *ppos)
8130 {
8131         struct inode *inode = file_inode(filp);
8132         struct trace_array *tr = inode->i_private;
8133         struct array_buffer *trace_buf = &tr->array_buffer;
8134         int cpu = tracing_get_cpu(inode);
8135         struct trace_seq *s;
8136         unsigned long cnt;
8137         unsigned long long t;
8138         unsigned long usec_rem;
8139
8140         s = kmalloc(sizeof(*s), GFP_KERNEL);
8141         if (!s)
8142                 return -ENOMEM;
8143
8144         trace_seq_init(s);
8145
8146         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8147         trace_seq_printf(s, "entries: %ld\n", cnt);
8148
8149         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8150         trace_seq_printf(s, "overrun: %ld\n", cnt);
8151
8152         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8153         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8154
8155         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8156         trace_seq_printf(s, "bytes: %ld\n", cnt);
8157
8158         if (trace_clocks[tr->clock_id].in_ns) {
8159                 /* local or global for trace_clock */
8160                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8161                 usec_rem = do_div(t, USEC_PER_SEC);
8162                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8163                                                                 t, usec_rem);
8164
8165                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8166                 usec_rem = do_div(t, USEC_PER_SEC);
8167                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8168         } else {
8169                 /* counter or tsc mode for trace_clock */
8170                 trace_seq_printf(s, "oldest event ts: %llu\n",
8171                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8172
8173                 trace_seq_printf(s, "now ts: %llu\n",
8174                                 ring_buffer_time_stamp(trace_buf->buffer));
8175         }
8176
8177         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8178         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8179
8180         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8181         trace_seq_printf(s, "read events: %ld\n", cnt);
8182
8183         count = simple_read_from_buffer(ubuf, count, ppos,
8184                                         s->buffer, trace_seq_used(s));
8185
8186         kfree(s);
8187
8188         return count;
8189 }
8190
8191 static const struct file_operations tracing_stats_fops = {
8192         .open           = tracing_open_generic_tr,
8193         .read           = tracing_stats_read,
8194         .llseek         = generic_file_llseek,
8195         .release        = tracing_release_generic_tr,
8196 };
8197
8198 #ifdef CONFIG_DYNAMIC_FTRACE
8199
8200 static ssize_t
8201 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8202                   size_t cnt, loff_t *ppos)
8203 {
8204         ssize_t ret;
8205         char *buf;
8206         int r;
8207
8208         /* 256 should be plenty to hold the amount needed */
8209         buf = kmalloc(256, GFP_KERNEL);
8210         if (!buf)
8211                 return -ENOMEM;
8212
8213         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8214                       ftrace_update_tot_cnt,
8215                       ftrace_number_of_pages,
8216                       ftrace_number_of_groups);
8217
8218         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8219         kfree(buf);
8220         return ret;
8221 }
8222
8223 static const struct file_operations tracing_dyn_info_fops = {
8224         .open           = tracing_open_generic,
8225         .read           = tracing_read_dyn_info,
8226         .llseek         = generic_file_llseek,
8227 };
8228 #endif /* CONFIG_DYNAMIC_FTRACE */
8229
8230 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8231 static void
8232 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8233                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8234                 void *data)
8235 {
8236         tracing_snapshot_instance(tr);
8237 }
8238
8239 static void
8240 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8241                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8242                       void *data)
8243 {
8244         struct ftrace_func_mapper *mapper = data;
8245         long *count = NULL;
8246
8247         if (mapper)
8248                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8249
8250         if (count) {
8251
8252                 if (*count <= 0)
8253                         return;
8254
8255                 (*count)--;
8256         }
8257
8258         tracing_snapshot_instance(tr);
8259 }
8260
8261 static int
8262 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8263                       struct ftrace_probe_ops *ops, void *data)
8264 {
8265         struct ftrace_func_mapper *mapper = data;
8266         long *count = NULL;
8267
8268         seq_printf(m, "%ps:", (void *)ip);
8269
8270         seq_puts(m, "snapshot");
8271
8272         if (mapper)
8273                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8274
8275         if (count)
8276                 seq_printf(m, ":count=%ld\n", *count);
8277         else
8278                 seq_puts(m, ":unlimited\n");
8279
8280         return 0;
8281 }
8282
8283 static int
8284 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8285                      unsigned long ip, void *init_data, void **data)
8286 {
8287         struct ftrace_func_mapper *mapper = *data;
8288
8289         if (!mapper) {
8290                 mapper = allocate_ftrace_func_mapper();
8291                 if (!mapper)
8292                         return -ENOMEM;
8293                 *data = mapper;
8294         }
8295
8296         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8297 }
8298
8299 static void
8300 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8301                      unsigned long ip, void *data)
8302 {
8303         struct ftrace_func_mapper *mapper = data;
8304
8305         if (!ip) {
8306                 if (!mapper)
8307                         return;
8308                 free_ftrace_func_mapper(mapper, NULL);
8309                 return;
8310         }
8311
8312         ftrace_func_mapper_remove_ip(mapper, ip);
8313 }
8314
8315 static struct ftrace_probe_ops snapshot_probe_ops = {
8316         .func                   = ftrace_snapshot,
8317         .print                  = ftrace_snapshot_print,
8318 };
8319
8320 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8321         .func                   = ftrace_count_snapshot,
8322         .print                  = ftrace_snapshot_print,
8323         .init                   = ftrace_snapshot_init,
8324         .free                   = ftrace_snapshot_free,
8325 };
8326
8327 static int
8328 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8329                                char *glob, char *cmd, char *param, int enable)
8330 {
8331         struct ftrace_probe_ops *ops;
8332         void *count = (void *)-1;
8333         char *number;
8334         int ret;
8335
8336         if (!tr)
8337                 return -ENODEV;
8338
8339         /* hash funcs only work with set_ftrace_filter */
8340         if (!enable)
8341                 return -EINVAL;
8342
8343         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8344
8345         if (glob[0] == '!')
8346                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8347
8348         if (!param)
8349                 goto out_reg;
8350
8351         number = strsep(&param, ":");
8352
8353         if (!strlen(number))
8354                 goto out_reg;
8355
8356         /*
8357          * We use the callback data field (which is a pointer)
8358          * as our counter.
8359          */
8360         ret = kstrtoul(number, 0, (unsigned long *)&count);
8361         if (ret)
8362                 return ret;
8363
8364  out_reg:
8365         ret = tracing_alloc_snapshot_instance(tr);
8366         if (ret < 0)
8367                 goto out;
8368
8369         ret = register_ftrace_function_probe(glob, tr, ops, count);
8370
8371  out:
8372         return ret < 0 ? ret : 0;
8373 }
8374
8375 static struct ftrace_func_command ftrace_snapshot_cmd = {
8376         .name                   = "snapshot",
8377         .func                   = ftrace_trace_snapshot_callback,
8378 };
8379
8380 static __init int register_snapshot_cmd(void)
8381 {
8382         return register_ftrace_command(&ftrace_snapshot_cmd);
8383 }
8384 #else
8385 static inline __init int register_snapshot_cmd(void) { return 0; }
8386 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8387
8388 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8389 {
8390         if (WARN_ON(!tr->dir))
8391                 return ERR_PTR(-ENODEV);
8392
8393         /* Top directory uses NULL as the parent */
8394         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8395                 return NULL;
8396
8397         /* All sub buffers have a descriptor */
8398         return tr->dir;
8399 }
8400
8401 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8402 {
8403         struct dentry *d_tracer;
8404
8405         if (tr->percpu_dir)
8406                 return tr->percpu_dir;
8407
8408         d_tracer = tracing_get_dentry(tr);
8409         if (IS_ERR(d_tracer))
8410                 return NULL;
8411
8412         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8413
8414         MEM_FAIL(!tr->percpu_dir,
8415                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8416
8417         return tr->percpu_dir;
8418 }
8419
8420 static struct dentry *
8421 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8422                       void *data, long cpu, const struct file_operations *fops)
8423 {
8424         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8425
8426         if (ret) /* See tracing_get_cpu() */
8427                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8428         return ret;
8429 }
8430
8431 static void
8432 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8433 {
8434         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8435         struct dentry *d_cpu;
8436         char cpu_dir[30]; /* 30 characters should be more than enough */
8437
8438         if (!d_percpu)
8439                 return;
8440
8441         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8442         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8443         if (!d_cpu) {
8444                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8445                 return;
8446         }
8447
8448         /* per cpu trace_pipe */
8449         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8450                                 tr, cpu, &tracing_pipe_fops);
8451
8452         /* per cpu trace */
8453         trace_create_cpu_file("trace", 0644, d_cpu,
8454                                 tr, cpu, &tracing_fops);
8455
8456         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8457                                 tr, cpu, &tracing_buffers_fops);
8458
8459         trace_create_cpu_file("stats", 0444, d_cpu,
8460                                 tr, cpu, &tracing_stats_fops);
8461
8462         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8463                                 tr, cpu, &tracing_entries_fops);
8464
8465 #ifdef CONFIG_TRACER_SNAPSHOT
8466         trace_create_cpu_file("snapshot", 0644, d_cpu,
8467                                 tr, cpu, &snapshot_fops);
8468
8469         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8470                                 tr, cpu, &snapshot_raw_fops);
8471 #endif
8472 }
8473
8474 #ifdef CONFIG_FTRACE_SELFTEST
8475 /* Let selftest have access to static functions in this file */
8476 #include "trace_selftest.c"
8477 #endif
8478
8479 static ssize_t
8480 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8481                         loff_t *ppos)
8482 {
8483         struct trace_option_dentry *topt = filp->private_data;
8484         char *buf;
8485
8486         if (topt->flags->val & topt->opt->bit)
8487                 buf = "1\n";
8488         else
8489                 buf = "0\n";
8490
8491         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8492 }
8493
8494 static ssize_t
8495 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8496                          loff_t *ppos)
8497 {
8498         struct trace_option_dentry *topt = filp->private_data;
8499         unsigned long val;
8500         int ret;
8501
8502         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8503         if (ret)
8504                 return ret;
8505
8506         if (val != 0 && val != 1)
8507                 return -EINVAL;
8508
8509         if (!!(topt->flags->val & topt->opt->bit) != val) {
8510                 mutex_lock(&trace_types_lock);
8511                 ret = __set_tracer_option(topt->tr, topt->flags,
8512                                           topt->opt, !val);
8513                 mutex_unlock(&trace_types_lock);
8514                 if (ret)
8515                         return ret;
8516         }
8517
8518         *ppos += cnt;
8519
8520         return cnt;
8521 }
8522
8523
8524 static const struct file_operations trace_options_fops = {
8525         .open = tracing_open_generic,
8526         .read = trace_options_read,
8527         .write = trace_options_write,
8528         .llseek = generic_file_llseek,
8529 };
8530
8531 /*
8532  * In order to pass in both the trace_array descriptor as well as the index
8533  * to the flag that the trace option file represents, the trace_array
8534  * has a character array of trace_flags_index[], which holds the index
8535  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8536  * The address of this character array is passed to the flag option file
8537  * read/write callbacks.
8538  *
8539  * In order to extract both the index and the trace_array descriptor,
8540  * get_tr_index() uses the following algorithm.
8541  *
8542  *   idx = *ptr;
8543  *
8544  * As the pointer itself contains the address of the index (remember
8545  * index[1] == 1).
8546  *
8547  * Then to get the trace_array descriptor, by subtracting that index
8548  * from the ptr, we get to the start of the index itself.
8549  *
8550  *   ptr - idx == &index[0]
8551  *
8552  * Then a simple container_of() from that pointer gets us to the
8553  * trace_array descriptor.
8554  */
8555 static void get_tr_index(void *data, struct trace_array **ptr,
8556                          unsigned int *pindex)
8557 {
8558         *pindex = *(unsigned char *)data;
8559
8560         *ptr = container_of(data - *pindex, struct trace_array,
8561                             trace_flags_index);
8562 }
8563
8564 static ssize_t
8565 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8566                         loff_t *ppos)
8567 {
8568         void *tr_index = filp->private_data;
8569         struct trace_array *tr;
8570         unsigned int index;
8571         char *buf;
8572
8573         get_tr_index(tr_index, &tr, &index);
8574
8575         if (tr->trace_flags & (1 << index))
8576                 buf = "1\n";
8577         else
8578                 buf = "0\n";
8579
8580         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8581 }
8582
8583 static ssize_t
8584 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8585                          loff_t *ppos)
8586 {
8587         void *tr_index = filp->private_data;
8588         struct trace_array *tr;
8589         unsigned int index;
8590         unsigned long val;
8591         int ret;
8592
8593         get_tr_index(tr_index, &tr, &index);
8594
8595         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8596         if (ret)
8597                 return ret;
8598
8599         if (val != 0 && val != 1)
8600                 return -EINVAL;
8601
8602         mutex_lock(&event_mutex);
8603         mutex_lock(&trace_types_lock);
8604         ret = set_tracer_flag(tr, 1 << index, val);
8605         mutex_unlock(&trace_types_lock);
8606         mutex_unlock(&event_mutex);
8607
8608         if (ret < 0)
8609                 return ret;
8610
8611         *ppos += cnt;
8612
8613         return cnt;
8614 }
8615
8616 static const struct file_operations trace_options_core_fops = {
8617         .open = tracing_open_generic,
8618         .read = trace_options_core_read,
8619         .write = trace_options_core_write,
8620         .llseek = generic_file_llseek,
8621 };
8622
8623 struct dentry *trace_create_file(const char *name,
8624                                  umode_t mode,
8625                                  struct dentry *parent,
8626                                  void *data,
8627                                  const struct file_operations *fops)
8628 {
8629         struct dentry *ret;
8630
8631         ret = tracefs_create_file(name, mode, parent, data, fops);
8632         if (!ret)
8633                 pr_warn("Could not create tracefs '%s' entry\n", name);
8634
8635         return ret;
8636 }
8637
8638
8639 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8640 {
8641         struct dentry *d_tracer;
8642
8643         if (tr->options)
8644                 return tr->options;
8645
8646         d_tracer = tracing_get_dentry(tr);
8647         if (IS_ERR(d_tracer))
8648                 return NULL;
8649
8650         tr->options = tracefs_create_dir("options", d_tracer);
8651         if (!tr->options) {
8652                 pr_warn("Could not create tracefs directory 'options'\n");
8653                 return NULL;
8654         }
8655
8656         return tr->options;
8657 }
8658
8659 static void
8660 create_trace_option_file(struct trace_array *tr,
8661                          struct trace_option_dentry *topt,
8662                          struct tracer_flags *flags,
8663                          struct tracer_opt *opt)
8664 {
8665         struct dentry *t_options;
8666
8667         t_options = trace_options_init_dentry(tr);
8668         if (!t_options)
8669                 return;
8670
8671         topt->flags = flags;
8672         topt->opt = opt;
8673         topt->tr = tr;
8674
8675         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8676                                     &trace_options_fops);
8677
8678 }
8679
8680 static void
8681 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8682 {
8683         struct trace_option_dentry *topts;
8684         struct trace_options *tr_topts;
8685         struct tracer_flags *flags;
8686         struct tracer_opt *opts;
8687         int cnt;
8688         int i;
8689
8690         if (!tracer)
8691                 return;
8692
8693         flags = tracer->flags;
8694
8695         if (!flags || !flags->opts)
8696                 return;
8697
8698         /*
8699          * If this is an instance, only create flags for tracers
8700          * the instance may have.
8701          */
8702         if (!trace_ok_for_array(tracer, tr))
8703                 return;
8704
8705         for (i = 0; i < tr->nr_topts; i++) {
8706                 /* Make sure there's no duplicate flags. */
8707                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8708                         return;
8709         }
8710
8711         opts = flags->opts;
8712
8713         for (cnt = 0; opts[cnt].name; cnt++)
8714                 ;
8715
8716         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8717         if (!topts)
8718                 return;
8719
8720         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8721                             GFP_KERNEL);
8722         if (!tr_topts) {
8723                 kfree(topts);
8724                 return;
8725         }
8726
8727         tr->topts = tr_topts;
8728         tr->topts[tr->nr_topts].tracer = tracer;
8729         tr->topts[tr->nr_topts].topts = topts;
8730         tr->nr_topts++;
8731
8732         for (cnt = 0; opts[cnt].name; cnt++) {
8733                 create_trace_option_file(tr, &topts[cnt], flags,
8734                                          &opts[cnt]);
8735                 MEM_FAIL(topts[cnt].entry == NULL,
8736                           "Failed to create trace option: %s",
8737                           opts[cnt].name);
8738         }
8739 }
8740
8741 static struct dentry *
8742 create_trace_option_core_file(struct trace_array *tr,
8743                               const char *option, long index)
8744 {
8745         struct dentry *t_options;
8746
8747         t_options = trace_options_init_dentry(tr);
8748         if (!t_options)
8749                 return NULL;
8750
8751         return trace_create_file(option, 0644, t_options,
8752                                  (void *)&tr->trace_flags_index[index],
8753                                  &trace_options_core_fops);
8754 }
8755
8756 static void create_trace_options_dir(struct trace_array *tr)
8757 {
8758         struct dentry *t_options;
8759         bool top_level = tr == &global_trace;
8760         int i;
8761
8762         t_options = trace_options_init_dentry(tr);
8763         if (!t_options)
8764                 return;
8765
8766         for (i = 0; trace_options[i]; i++) {
8767                 if (top_level ||
8768                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8769                         create_trace_option_core_file(tr, trace_options[i], i);
8770         }
8771 }
8772
8773 static ssize_t
8774 rb_simple_read(struct file *filp, char __user *ubuf,
8775                size_t cnt, loff_t *ppos)
8776 {
8777         struct trace_array *tr = filp->private_data;
8778         char buf[64];
8779         int r;
8780
8781         r = tracer_tracing_is_on(tr);
8782         r = sprintf(buf, "%d\n", r);
8783
8784         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8785 }
8786
8787 static ssize_t
8788 rb_simple_write(struct file *filp, const char __user *ubuf,
8789                 size_t cnt, loff_t *ppos)
8790 {
8791         struct trace_array *tr = filp->private_data;
8792         struct trace_buffer *buffer = tr->array_buffer.buffer;
8793         unsigned long val;
8794         int ret;
8795
8796         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8797         if (ret)
8798                 return ret;
8799
8800         if (buffer) {
8801                 mutex_lock(&trace_types_lock);
8802                 if (!!val == tracer_tracing_is_on(tr)) {
8803                         val = 0; /* do nothing */
8804                 } else if (val) {
8805                         tracer_tracing_on(tr);
8806                         if (tr->current_trace->start)
8807                                 tr->current_trace->start(tr);
8808                 } else {
8809                         tracer_tracing_off(tr);
8810                         if (tr->current_trace->stop)
8811                                 tr->current_trace->stop(tr);
8812                 }
8813                 mutex_unlock(&trace_types_lock);
8814         }
8815
8816         (*ppos)++;
8817
8818         return cnt;
8819 }
8820
8821 static const struct file_operations rb_simple_fops = {
8822         .open           = tracing_open_generic_tr,
8823         .read           = rb_simple_read,
8824         .write          = rb_simple_write,
8825         .release        = tracing_release_generic_tr,
8826         .llseek         = default_llseek,
8827 };
8828
8829 static ssize_t
8830 buffer_percent_read(struct file *filp, char __user *ubuf,
8831                     size_t cnt, loff_t *ppos)
8832 {
8833         struct trace_array *tr = filp->private_data;
8834         char buf[64];
8835         int r;
8836
8837         r = tr->buffer_percent;
8838         r = sprintf(buf, "%d\n", r);
8839
8840         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8841 }
8842
8843 static ssize_t
8844 buffer_percent_write(struct file *filp, const char __user *ubuf,
8845                      size_t cnt, loff_t *ppos)
8846 {
8847         struct trace_array *tr = filp->private_data;
8848         unsigned long val;
8849         int ret;
8850
8851         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8852         if (ret)
8853                 return ret;
8854
8855         if (val > 100)
8856                 return -EINVAL;
8857
8858         if (!val)
8859                 val = 1;
8860
8861         tr->buffer_percent = val;
8862
8863         (*ppos)++;
8864
8865         return cnt;
8866 }
8867
8868 static const struct file_operations buffer_percent_fops = {
8869         .open           = tracing_open_generic_tr,
8870         .read           = buffer_percent_read,
8871         .write          = buffer_percent_write,
8872         .release        = tracing_release_generic_tr,
8873         .llseek         = default_llseek,
8874 };
8875
8876 static struct dentry *trace_instance_dir;
8877
8878 static void
8879 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8880
8881 static int
8882 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8883 {
8884         enum ring_buffer_flags rb_flags;
8885
8886         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8887
8888         buf->tr = tr;
8889
8890         buf->buffer = ring_buffer_alloc(size, rb_flags);
8891         if (!buf->buffer)
8892                 return -ENOMEM;
8893
8894         buf->data = alloc_percpu(struct trace_array_cpu);
8895         if (!buf->data) {
8896                 ring_buffer_free(buf->buffer);
8897                 buf->buffer = NULL;
8898                 return -ENOMEM;
8899         }
8900
8901         /* Allocate the first page for all buffers */
8902         set_buffer_entries(&tr->array_buffer,
8903                            ring_buffer_size(tr->array_buffer.buffer, 0));
8904
8905         return 0;
8906 }
8907
8908 static int allocate_trace_buffers(struct trace_array *tr, int size)
8909 {
8910         int ret;
8911
8912         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8913         if (ret)
8914                 return ret;
8915
8916 #ifdef CONFIG_TRACER_MAX_TRACE
8917         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8918                                     allocate_snapshot ? size : 1);
8919         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8920                 ring_buffer_free(tr->array_buffer.buffer);
8921                 tr->array_buffer.buffer = NULL;
8922                 free_percpu(tr->array_buffer.data);
8923                 tr->array_buffer.data = NULL;
8924                 return -ENOMEM;
8925         }
8926         tr->allocated_snapshot = allocate_snapshot;
8927
8928         /*
8929          * Only the top level trace array gets its snapshot allocated
8930          * from the kernel command line.
8931          */
8932         allocate_snapshot = false;
8933 #endif
8934
8935         return 0;
8936 }
8937
8938 static void free_trace_buffer(struct array_buffer *buf)
8939 {
8940         if (buf->buffer) {
8941                 ring_buffer_free(buf->buffer);
8942                 buf->buffer = NULL;
8943                 free_percpu(buf->data);
8944                 buf->data = NULL;
8945         }
8946 }
8947
8948 static void free_trace_buffers(struct trace_array *tr)
8949 {
8950         if (!tr)
8951                 return;
8952
8953         free_trace_buffer(&tr->array_buffer);
8954
8955 #ifdef CONFIG_TRACER_MAX_TRACE
8956         free_trace_buffer(&tr->max_buffer);
8957 #endif
8958 }
8959
8960 static void init_trace_flags_index(struct trace_array *tr)
8961 {
8962         int i;
8963
8964         /* Used by the trace options files */
8965         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8966                 tr->trace_flags_index[i] = i;
8967 }
8968
8969 static void __update_tracer_options(struct trace_array *tr)
8970 {
8971         struct tracer *t;
8972
8973         for (t = trace_types; t; t = t->next)
8974                 add_tracer_options(tr, t);
8975 }
8976
8977 static void update_tracer_options(struct trace_array *tr)
8978 {
8979         mutex_lock(&trace_types_lock);
8980         __update_tracer_options(tr);
8981         mutex_unlock(&trace_types_lock);
8982 }
8983
8984 /* Must have trace_types_lock held */
8985 struct trace_array *trace_array_find(const char *instance)
8986 {
8987         struct trace_array *tr, *found = NULL;
8988
8989         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8990                 if (tr->name && strcmp(tr->name, instance) == 0) {
8991                         found = tr;
8992                         break;
8993                 }
8994         }
8995
8996         return found;
8997 }
8998
8999 struct trace_array *trace_array_find_get(const char *instance)
9000 {
9001         struct trace_array *tr;
9002
9003         mutex_lock(&trace_types_lock);
9004         tr = trace_array_find(instance);
9005         if (tr)
9006                 tr->ref++;
9007         mutex_unlock(&trace_types_lock);
9008
9009         return tr;
9010 }
9011
9012 static int trace_array_create_dir(struct trace_array *tr)
9013 {
9014         int ret;
9015
9016         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9017         if (!tr->dir)
9018                 return -EINVAL;
9019
9020         ret = event_trace_add_tracer(tr->dir, tr);
9021         if (ret) {
9022                 tracefs_remove(tr->dir);
9023                 return ret;
9024         }
9025
9026         init_tracer_tracefs(tr, tr->dir);
9027         __update_tracer_options(tr);
9028
9029         return ret;
9030 }
9031
9032 static struct trace_array *trace_array_create(const char *name)
9033 {
9034         struct trace_array *tr;
9035         int ret;
9036
9037         ret = -ENOMEM;
9038         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9039         if (!tr)
9040                 return ERR_PTR(ret);
9041
9042         tr->name = kstrdup(name, GFP_KERNEL);
9043         if (!tr->name)
9044                 goto out_free_tr;
9045
9046         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9047                 goto out_free_tr;
9048
9049         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9050
9051         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9052
9053         raw_spin_lock_init(&tr->start_lock);
9054
9055         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9056
9057         tr->current_trace = &nop_trace;
9058
9059         INIT_LIST_HEAD(&tr->systems);
9060         INIT_LIST_HEAD(&tr->events);
9061         INIT_LIST_HEAD(&tr->hist_vars);
9062         INIT_LIST_HEAD(&tr->err_log);
9063
9064         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9065                 goto out_free_tr;
9066
9067         if (ftrace_allocate_ftrace_ops(tr) < 0)
9068                 goto out_free_tr;
9069
9070         ftrace_init_trace_array(tr);
9071
9072         init_trace_flags_index(tr);
9073
9074         if (trace_instance_dir) {
9075                 ret = trace_array_create_dir(tr);
9076                 if (ret)
9077                         goto out_free_tr;
9078         } else
9079                 __trace_early_add_events(tr);
9080
9081         list_add(&tr->list, &ftrace_trace_arrays);
9082
9083         tr->ref++;
9084
9085         return tr;
9086
9087  out_free_tr:
9088         ftrace_free_ftrace_ops(tr);
9089         free_trace_buffers(tr);
9090         free_cpumask_var(tr->tracing_cpumask);
9091         kfree(tr->name);
9092         kfree(tr);
9093
9094         return ERR_PTR(ret);
9095 }
9096
9097 static int instance_mkdir(const char *name)
9098 {
9099         struct trace_array *tr;
9100         int ret;
9101
9102         mutex_lock(&event_mutex);
9103         mutex_lock(&trace_types_lock);
9104
9105         ret = -EEXIST;
9106         if (trace_array_find(name))
9107                 goto out_unlock;
9108
9109         tr = trace_array_create(name);
9110
9111         ret = PTR_ERR_OR_ZERO(tr);
9112
9113 out_unlock:
9114         mutex_unlock(&trace_types_lock);
9115         mutex_unlock(&event_mutex);
9116         return ret;
9117 }
9118
9119 /**
9120  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9121  * @name: The name of the trace array to be looked up/created.
9122  *
9123  * Returns pointer to trace array with given name.
9124  * NULL, if it cannot be created.
9125  *
9126  * NOTE: This function increments the reference counter associated with the
9127  * trace array returned. This makes sure it cannot be freed while in use.
9128  * Use trace_array_put() once the trace array is no longer needed.
9129  * If the trace_array is to be freed, trace_array_destroy() needs to
9130  * be called after the trace_array_put(), or simply let user space delete
9131  * it from the tracefs instances directory. But until the
9132  * trace_array_put() is called, user space can not delete it.
9133  *
9134  */
9135 struct trace_array *trace_array_get_by_name(const char *name)
9136 {
9137         struct trace_array *tr;
9138
9139         mutex_lock(&event_mutex);
9140         mutex_lock(&trace_types_lock);
9141
9142         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9143                 if (tr->name && strcmp(tr->name, name) == 0)
9144                         goto out_unlock;
9145         }
9146
9147         tr = trace_array_create(name);
9148
9149         if (IS_ERR(tr))
9150                 tr = NULL;
9151 out_unlock:
9152         if (tr)
9153                 tr->ref++;
9154
9155         mutex_unlock(&trace_types_lock);
9156         mutex_unlock(&event_mutex);
9157         return tr;
9158 }
9159 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9160
9161 static int __remove_instance(struct trace_array *tr)
9162 {
9163         int i;
9164
9165         /* Reference counter for a newly created trace array = 1. */
9166         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9167                 return -EBUSY;
9168
9169         list_del(&tr->list);
9170
9171         /* Disable all the flags that were enabled coming in */
9172         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9173                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9174                         set_tracer_flag(tr, 1 << i, 0);
9175         }
9176
9177         tracing_set_nop(tr);
9178         clear_ftrace_function_probes(tr);
9179         event_trace_del_tracer(tr);
9180         ftrace_clear_pids(tr);
9181         ftrace_destroy_function_files(tr);
9182         tracefs_remove(tr->dir);
9183         free_percpu(tr->last_func_repeats);
9184         free_trace_buffers(tr);
9185
9186         for (i = 0; i < tr->nr_topts; i++) {
9187                 kfree(tr->topts[i].topts);
9188         }
9189         kfree(tr->topts);
9190
9191         free_cpumask_var(tr->tracing_cpumask);
9192         kfree(tr->name);
9193         kfree(tr);
9194
9195         return 0;
9196 }
9197
9198 int trace_array_destroy(struct trace_array *this_tr)
9199 {
9200         struct trace_array *tr;
9201         int ret;
9202
9203         if (!this_tr)
9204                 return -EINVAL;
9205
9206         mutex_lock(&event_mutex);
9207         mutex_lock(&trace_types_lock);
9208
9209         ret = -ENODEV;
9210
9211         /* Making sure trace array exists before destroying it. */
9212         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9213                 if (tr == this_tr) {
9214                         ret = __remove_instance(tr);
9215                         break;
9216                 }
9217         }
9218
9219         mutex_unlock(&trace_types_lock);
9220         mutex_unlock(&event_mutex);
9221
9222         return ret;
9223 }
9224 EXPORT_SYMBOL_GPL(trace_array_destroy);
9225
9226 static int instance_rmdir(const char *name)
9227 {
9228         struct trace_array *tr;
9229         int ret;
9230
9231         mutex_lock(&event_mutex);
9232         mutex_lock(&trace_types_lock);
9233
9234         ret = -ENODEV;
9235         tr = trace_array_find(name);
9236         if (tr)
9237                 ret = __remove_instance(tr);
9238
9239         mutex_unlock(&trace_types_lock);
9240         mutex_unlock(&event_mutex);
9241
9242         return ret;
9243 }
9244
9245 static __init void create_trace_instances(struct dentry *d_tracer)
9246 {
9247         struct trace_array *tr;
9248
9249         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9250                                                          instance_mkdir,
9251                                                          instance_rmdir);
9252         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9253                 return;
9254
9255         mutex_lock(&event_mutex);
9256         mutex_lock(&trace_types_lock);
9257
9258         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9259                 if (!tr->name)
9260                         continue;
9261                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9262                              "Failed to create instance directory\n"))
9263                         break;
9264         }
9265
9266         mutex_unlock(&trace_types_lock);
9267         mutex_unlock(&event_mutex);
9268 }
9269
9270 static void
9271 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9272 {
9273         struct trace_event_file *file;
9274         int cpu;
9275
9276         trace_create_file("available_tracers", 0444, d_tracer,
9277                         tr, &show_traces_fops);
9278
9279         trace_create_file("current_tracer", 0644, d_tracer,
9280                         tr, &set_tracer_fops);
9281
9282         trace_create_file("tracing_cpumask", 0644, d_tracer,
9283                           tr, &tracing_cpumask_fops);
9284
9285         trace_create_file("trace_options", 0644, d_tracer,
9286                           tr, &tracing_iter_fops);
9287
9288         trace_create_file("trace", 0644, d_tracer,
9289                           tr, &tracing_fops);
9290
9291         trace_create_file("trace_pipe", 0444, d_tracer,
9292                           tr, &tracing_pipe_fops);
9293
9294         trace_create_file("buffer_size_kb", 0644, d_tracer,
9295                           tr, &tracing_entries_fops);
9296
9297         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9298                           tr, &tracing_total_entries_fops);
9299
9300         trace_create_file("free_buffer", 0200, d_tracer,
9301                           tr, &tracing_free_buffer_fops);
9302
9303         trace_create_file("trace_marker", 0220, d_tracer,
9304                           tr, &tracing_mark_fops);
9305
9306         file = __find_event_file(tr, "ftrace", "print");
9307         if (file && file->dir)
9308                 trace_create_file("trigger", 0644, file->dir, file,
9309                                   &event_trigger_fops);
9310         tr->trace_marker_file = file;
9311
9312         trace_create_file("trace_marker_raw", 0220, d_tracer,
9313                           tr, &tracing_mark_raw_fops);
9314
9315         trace_create_file("trace_clock", 0644, d_tracer, tr,
9316                           &trace_clock_fops);
9317
9318         trace_create_file("tracing_on", 0644, d_tracer,
9319                           tr, &rb_simple_fops);
9320
9321         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9322                           &trace_time_stamp_mode_fops);
9323
9324         tr->buffer_percent = 50;
9325
9326         trace_create_file("buffer_percent", 0444, d_tracer,
9327                         tr, &buffer_percent_fops);
9328
9329         create_trace_options_dir(tr);
9330
9331 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9332         trace_create_maxlat_file(tr, d_tracer);
9333 #endif
9334
9335         if (ftrace_create_function_files(tr, d_tracer))
9336                 MEM_FAIL(1, "Could not allocate function filter files");
9337
9338 #ifdef CONFIG_TRACER_SNAPSHOT
9339         trace_create_file("snapshot", 0644, d_tracer,
9340                           tr, &snapshot_fops);
9341 #endif
9342
9343         trace_create_file("error_log", 0644, d_tracer,
9344                           tr, &tracing_err_log_fops);
9345
9346         for_each_tracing_cpu(cpu)
9347                 tracing_init_tracefs_percpu(tr, cpu);
9348
9349         ftrace_init_tracefs(tr, d_tracer);
9350 }
9351
9352 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9353 {
9354         struct vfsmount *mnt;
9355         struct file_system_type *type;
9356
9357         /*
9358          * To maintain backward compatibility for tools that mount
9359          * debugfs to get to the tracing facility, tracefs is automatically
9360          * mounted to the debugfs/tracing directory.
9361          */
9362         type = get_fs_type("tracefs");
9363         if (!type)
9364                 return NULL;
9365         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9366         put_filesystem(type);
9367         if (IS_ERR(mnt))
9368                 return NULL;
9369         mntget(mnt);
9370
9371         return mnt;
9372 }
9373
9374 /**
9375  * tracing_init_dentry - initialize top level trace array
9376  *
9377  * This is called when creating files or directories in the tracing
9378  * directory. It is called via fs_initcall() by any of the boot up code
9379  * and expects to return the dentry of the top level tracing directory.
9380  */
9381 int tracing_init_dentry(void)
9382 {
9383         struct trace_array *tr = &global_trace;
9384
9385         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9386                 pr_warn("Tracing disabled due to lockdown\n");
9387                 return -EPERM;
9388         }
9389
9390         /* The top level trace array uses  NULL as parent */
9391         if (tr->dir)
9392                 return 0;
9393
9394         if (WARN_ON(!tracefs_initialized()))
9395                 return -ENODEV;
9396
9397         /*
9398          * As there may still be users that expect the tracing
9399          * files to exist in debugfs/tracing, we must automount
9400          * the tracefs file system there, so older tools still
9401          * work with the newer kernel.
9402          */
9403         tr->dir = debugfs_create_automount("tracing", NULL,
9404                                            trace_automount, NULL);
9405
9406         return 0;
9407 }
9408
9409 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9410 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9411
9412 static struct workqueue_struct *eval_map_wq __initdata;
9413 static struct work_struct eval_map_work __initdata;
9414
9415 static void __init eval_map_work_func(struct work_struct *work)
9416 {
9417         int len;
9418
9419         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9420         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9421 }
9422
9423 static int __init trace_eval_init(void)
9424 {
9425         INIT_WORK(&eval_map_work, eval_map_work_func);
9426
9427         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9428         if (!eval_map_wq) {
9429                 pr_err("Unable to allocate eval_map_wq\n");
9430                 /* Do work here */
9431                 eval_map_work_func(&eval_map_work);
9432                 return -ENOMEM;
9433         }
9434
9435         queue_work(eval_map_wq, &eval_map_work);
9436         return 0;
9437 }
9438
9439 static int __init trace_eval_sync(void)
9440 {
9441         /* Make sure the eval map updates are finished */
9442         if (eval_map_wq)
9443                 destroy_workqueue(eval_map_wq);
9444         return 0;
9445 }
9446
9447 late_initcall_sync(trace_eval_sync);
9448
9449
9450 #ifdef CONFIG_MODULES
9451 static void trace_module_add_evals(struct module *mod)
9452 {
9453         if (!mod->num_trace_evals)
9454                 return;
9455
9456         /*
9457          * Modules with bad taint do not have events created, do
9458          * not bother with enums either.
9459          */
9460         if (trace_module_has_bad_taint(mod))
9461                 return;
9462
9463         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9464 }
9465
9466 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9467 static void trace_module_remove_evals(struct module *mod)
9468 {
9469         union trace_eval_map_item *map;
9470         union trace_eval_map_item **last = &trace_eval_maps;
9471
9472         if (!mod->num_trace_evals)
9473                 return;
9474
9475         mutex_lock(&trace_eval_mutex);
9476
9477         map = trace_eval_maps;
9478
9479         while (map) {
9480                 if (map->head.mod == mod)
9481                         break;
9482                 map = trace_eval_jmp_to_tail(map);
9483                 last = &map->tail.next;
9484                 map = map->tail.next;
9485         }
9486         if (!map)
9487                 goto out;
9488
9489         *last = trace_eval_jmp_to_tail(map)->tail.next;
9490         kfree(map);
9491  out:
9492         mutex_unlock(&trace_eval_mutex);
9493 }
9494 #else
9495 static inline void trace_module_remove_evals(struct module *mod) { }
9496 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9497
9498 static int trace_module_notify(struct notifier_block *self,
9499                                unsigned long val, void *data)
9500 {
9501         struct module *mod = data;
9502
9503         switch (val) {
9504         case MODULE_STATE_COMING:
9505                 trace_module_add_evals(mod);
9506                 break;
9507         case MODULE_STATE_GOING:
9508                 trace_module_remove_evals(mod);
9509                 break;
9510         }
9511
9512         return NOTIFY_OK;
9513 }
9514
9515 static struct notifier_block trace_module_nb = {
9516         .notifier_call = trace_module_notify,
9517         .priority = 0,
9518 };
9519 #endif /* CONFIG_MODULES */
9520
9521 static __init int tracer_init_tracefs(void)
9522 {
9523         int ret;
9524
9525         trace_access_lock_init();
9526
9527         ret = tracing_init_dentry();
9528         if (ret)
9529                 return 0;
9530
9531         event_trace_init();
9532
9533         init_tracer_tracefs(&global_trace, NULL);
9534         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9535
9536         trace_create_file("tracing_thresh", 0644, NULL,
9537                         &global_trace, &tracing_thresh_fops);
9538
9539         trace_create_file("README", 0444, NULL,
9540                         NULL, &tracing_readme_fops);
9541
9542         trace_create_file("saved_cmdlines", 0444, NULL,
9543                         NULL, &tracing_saved_cmdlines_fops);
9544
9545         trace_create_file("saved_cmdlines_size", 0644, NULL,
9546                           NULL, &tracing_saved_cmdlines_size_fops);
9547
9548         trace_create_file("saved_tgids", 0444, NULL,
9549                         NULL, &tracing_saved_tgids_fops);
9550
9551         trace_eval_init();
9552
9553         trace_create_eval_file(NULL);
9554
9555 #ifdef CONFIG_MODULES
9556         register_module_notifier(&trace_module_nb);
9557 #endif
9558
9559 #ifdef CONFIG_DYNAMIC_FTRACE
9560         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9561                         NULL, &tracing_dyn_info_fops);
9562 #endif
9563
9564         create_trace_instances(NULL);
9565
9566         update_tracer_options(&global_trace);
9567
9568         return 0;
9569 }
9570
9571 static int trace_panic_handler(struct notifier_block *this,
9572                                unsigned long event, void *unused)
9573 {
9574         if (ftrace_dump_on_oops)
9575                 ftrace_dump(ftrace_dump_on_oops);
9576         return NOTIFY_OK;
9577 }
9578
9579 static struct notifier_block trace_panic_notifier = {
9580         .notifier_call  = trace_panic_handler,
9581         .next           = NULL,
9582         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9583 };
9584
9585 static int trace_die_handler(struct notifier_block *self,
9586                              unsigned long val,
9587                              void *data)
9588 {
9589         switch (val) {
9590         case DIE_OOPS:
9591                 if (ftrace_dump_on_oops)
9592                         ftrace_dump(ftrace_dump_on_oops);
9593                 break;
9594         default:
9595                 break;
9596         }
9597         return NOTIFY_OK;
9598 }
9599
9600 static struct notifier_block trace_die_notifier = {
9601         .notifier_call = trace_die_handler,
9602         .priority = 200
9603 };
9604
9605 /*
9606  * printk is set to max of 1024, we really don't need it that big.
9607  * Nothing should be printing 1000 characters anyway.
9608  */
9609 #define TRACE_MAX_PRINT         1000
9610
9611 /*
9612  * Define here KERN_TRACE so that we have one place to modify
9613  * it if we decide to change what log level the ftrace dump
9614  * should be at.
9615  */
9616 #define KERN_TRACE              KERN_EMERG
9617
9618 void
9619 trace_printk_seq(struct trace_seq *s)
9620 {
9621         /* Probably should print a warning here. */
9622         if (s->seq.len >= TRACE_MAX_PRINT)
9623                 s->seq.len = TRACE_MAX_PRINT;
9624
9625         /*
9626          * More paranoid code. Although the buffer size is set to
9627          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9628          * an extra layer of protection.
9629          */
9630         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9631                 s->seq.len = s->seq.size - 1;
9632
9633         /* should be zero ended, but we are paranoid. */
9634         s->buffer[s->seq.len] = 0;
9635
9636         printk(KERN_TRACE "%s", s->buffer);
9637
9638         trace_seq_init(s);
9639 }
9640
9641 void trace_init_global_iter(struct trace_iterator *iter)
9642 {
9643         iter->tr = &global_trace;
9644         iter->trace = iter->tr->current_trace;
9645         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9646         iter->array_buffer = &global_trace.array_buffer;
9647
9648         if (iter->trace && iter->trace->open)
9649                 iter->trace->open(iter);
9650
9651         /* Annotate start of buffers if we had overruns */
9652         if (ring_buffer_overruns(iter->array_buffer->buffer))
9653                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9654
9655         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9656         if (trace_clocks[iter->tr->clock_id].in_ns)
9657                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9658 }
9659
9660 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9661 {
9662         /* use static because iter can be a bit big for the stack */
9663         static struct trace_iterator iter;
9664         static atomic_t dump_running;
9665         struct trace_array *tr = &global_trace;
9666         unsigned int old_userobj;
9667         unsigned long flags;
9668         int cnt = 0, cpu;
9669
9670         /* Only allow one dump user at a time. */
9671         if (atomic_inc_return(&dump_running) != 1) {
9672                 atomic_dec(&dump_running);
9673                 return;
9674         }
9675
9676         /*
9677          * Always turn off tracing when we dump.
9678          * We don't need to show trace output of what happens
9679          * between multiple crashes.
9680          *
9681          * If the user does a sysrq-z, then they can re-enable
9682          * tracing with echo 1 > tracing_on.
9683          */
9684         tracing_off();
9685
9686         local_irq_save(flags);
9687         printk_nmi_direct_enter();
9688
9689         /* Simulate the iterator */
9690         trace_init_global_iter(&iter);
9691         /* Can not use kmalloc for iter.temp and iter.fmt */
9692         iter.temp = static_temp_buf;
9693         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9694         iter.fmt = static_fmt_buf;
9695         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9696
9697         for_each_tracing_cpu(cpu) {
9698                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9699         }
9700
9701         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9702
9703         /* don't look at user memory in panic mode */
9704         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9705
9706         switch (oops_dump_mode) {
9707         case DUMP_ALL:
9708                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9709                 break;
9710         case DUMP_ORIG:
9711                 iter.cpu_file = raw_smp_processor_id();
9712                 break;
9713         case DUMP_NONE:
9714                 goto out_enable;
9715         default:
9716                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9717                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9718         }
9719
9720         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9721
9722         /* Did function tracer already get disabled? */
9723         if (ftrace_is_dead()) {
9724                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9725                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9726         }
9727
9728         /*
9729          * We need to stop all tracing on all CPUS to read
9730          * the next buffer. This is a bit expensive, but is
9731          * not done often. We fill all what we can read,
9732          * and then release the locks again.
9733          */
9734
9735         while (!trace_empty(&iter)) {
9736
9737                 if (!cnt)
9738                         printk(KERN_TRACE "---------------------------------\n");
9739
9740                 cnt++;
9741
9742                 trace_iterator_reset(&iter);
9743                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9744
9745                 if (trace_find_next_entry_inc(&iter) != NULL) {
9746                         int ret;
9747
9748                         ret = print_trace_line(&iter);
9749                         if (ret != TRACE_TYPE_NO_CONSUME)
9750                                 trace_consume(&iter);
9751                 }
9752                 touch_nmi_watchdog();
9753
9754                 trace_printk_seq(&iter.seq);
9755         }
9756
9757         if (!cnt)
9758                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9759         else
9760                 printk(KERN_TRACE "---------------------------------\n");
9761
9762  out_enable:
9763         tr->trace_flags |= old_userobj;
9764
9765         for_each_tracing_cpu(cpu) {
9766                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9767         }
9768         atomic_dec(&dump_running);
9769         printk_nmi_direct_exit();
9770         local_irq_restore(flags);
9771 }
9772 EXPORT_SYMBOL_GPL(ftrace_dump);
9773
9774 #define WRITE_BUFSIZE  4096
9775
9776 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9777                                 size_t count, loff_t *ppos,
9778                                 int (*createfn)(const char *))
9779 {
9780         char *kbuf, *buf, *tmp;
9781         int ret = 0;
9782         size_t done = 0;
9783         size_t size;
9784
9785         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9786         if (!kbuf)
9787                 return -ENOMEM;
9788
9789         while (done < count) {
9790                 size = count - done;
9791
9792                 if (size >= WRITE_BUFSIZE)
9793                         size = WRITE_BUFSIZE - 1;
9794
9795                 if (copy_from_user(kbuf, buffer + done, size)) {
9796                         ret = -EFAULT;
9797                         goto out;
9798                 }
9799                 kbuf[size] = '\0';
9800                 buf = kbuf;
9801                 do {
9802                         tmp = strchr(buf, '\n');
9803                         if (tmp) {
9804                                 *tmp = '\0';
9805                                 size = tmp - buf + 1;
9806                         } else {
9807                                 size = strlen(buf);
9808                                 if (done + size < count) {
9809                                         if (buf != kbuf)
9810                                                 break;
9811                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9812                                         pr_warn("Line length is too long: Should be less than %d\n",
9813                                                 WRITE_BUFSIZE - 2);
9814                                         ret = -EINVAL;
9815                                         goto out;
9816                                 }
9817                         }
9818                         done += size;
9819
9820                         /* Remove comments */
9821                         tmp = strchr(buf, '#');
9822
9823                         if (tmp)
9824                                 *tmp = '\0';
9825
9826                         ret = createfn(buf);
9827                         if (ret)
9828                                 goto out;
9829                         buf += size;
9830
9831                 } while (done < count);
9832         }
9833         ret = done;
9834
9835 out:
9836         kfree(kbuf);
9837
9838         return ret;
9839 }
9840
9841 __init static int tracer_alloc_buffers(void)
9842 {
9843         int ring_buf_size;
9844         int ret = -ENOMEM;
9845
9846
9847         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9848                 pr_warn("Tracing disabled due to lockdown\n");
9849                 return -EPERM;
9850         }
9851
9852         /*
9853          * Make sure we don't accidentally add more trace options
9854          * than we have bits for.
9855          */
9856         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9857
9858         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9859                 goto out;
9860
9861         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9862                 goto out_free_buffer_mask;
9863
9864         /* Only allocate trace_printk buffers if a trace_printk exists */
9865         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9866                 /* Must be called before global_trace.buffer is allocated */
9867                 trace_printk_init_buffers();
9868
9869         /* To save memory, keep the ring buffer size to its minimum */
9870         if (ring_buffer_expanded)
9871                 ring_buf_size = trace_buf_size;
9872         else
9873                 ring_buf_size = 1;
9874
9875         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9876         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9877
9878         raw_spin_lock_init(&global_trace.start_lock);
9879
9880         /*
9881          * The prepare callbacks allocates some memory for the ring buffer. We
9882          * don't free the buffer if the CPU goes down. If we were to free
9883          * the buffer, then the user would lose any trace that was in the
9884          * buffer. The memory will be removed once the "instance" is removed.
9885          */
9886         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9887                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9888                                       NULL);
9889         if (ret < 0)
9890                 goto out_free_cpumask;
9891         /* Used for event triggers */
9892         ret = -ENOMEM;
9893         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9894         if (!temp_buffer)
9895                 goto out_rm_hp_state;
9896
9897         if (trace_create_savedcmd() < 0)
9898                 goto out_free_temp_buffer;
9899
9900         /* TODO: make the number of buffers hot pluggable with CPUS */
9901         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9902                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9903                 goto out_free_savedcmd;
9904         }
9905
9906         if (global_trace.buffer_disabled)
9907                 tracing_off();
9908
9909         if (trace_boot_clock) {
9910                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9911                 if (ret < 0)
9912                         pr_warn("Trace clock %s not defined, going back to default\n",
9913                                 trace_boot_clock);
9914         }
9915
9916         /*
9917          * register_tracer() might reference current_trace, so it
9918          * needs to be set before we register anything. This is
9919          * just a bootstrap of current_trace anyway.
9920          */
9921         global_trace.current_trace = &nop_trace;
9922
9923         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9924
9925         ftrace_init_global_array_ops(&global_trace);
9926
9927         init_trace_flags_index(&global_trace);
9928
9929         register_tracer(&nop_trace);
9930
9931         /* Function tracing may start here (via kernel command line) */
9932         init_function_trace();
9933
9934         /* All seems OK, enable tracing */
9935         tracing_disabled = 0;
9936
9937         atomic_notifier_chain_register(&panic_notifier_list,
9938                                        &trace_panic_notifier);
9939
9940         register_die_notifier(&trace_die_notifier);
9941
9942         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9943
9944         INIT_LIST_HEAD(&global_trace.systems);
9945         INIT_LIST_HEAD(&global_trace.events);
9946         INIT_LIST_HEAD(&global_trace.hist_vars);
9947         INIT_LIST_HEAD(&global_trace.err_log);
9948         list_add(&global_trace.list, &ftrace_trace_arrays);
9949
9950         apply_trace_boot_options();
9951
9952         register_snapshot_cmd();
9953
9954         test_can_verify();
9955
9956         return 0;
9957
9958 out_free_savedcmd:
9959         free_saved_cmdlines_buffer(savedcmd);
9960 out_free_temp_buffer:
9961         ring_buffer_free(temp_buffer);
9962 out_rm_hp_state:
9963         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9964 out_free_cpumask:
9965         free_cpumask_var(global_trace.tracing_cpumask);
9966 out_free_buffer_mask:
9967         free_cpumask_var(tracing_buffer_mask);
9968 out:
9969         return ret;
9970 }
9971
9972 void __init early_trace_init(void)
9973 {
9974         if (tracepoint_printk) {
9975                 tracepoint_print_iter =
9976                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9977                 if (MEM_FAIL(!tracepoint_print_iter,
9978                              "Failed to allocate trace iterator\n"))
9979                         tracepoint_printk = 0;
9980                 else
9981                         static_key_enable(&tracepoint_printk_key.key);
9982         }
9983         tracer_alloc_buffers();
9984 }
9985
9986 void __init trace_init(void)
9987 {
9988         trace_event_init();
9989 }
9990
9991 __init static int clear_boot_tracer(void)
9992 {
9993         /*
9994          * The default tracer at boot buffer is an init section.
9995          * This function is called in lateinit. If we did not
9996          * find the boot tracer, then clear it out, to prevent
9997          * later registration from accessing the buffer that is
9998          * about to be freed.
9999          */
10000         if (!default_bootup_tracer)
10001                 return 0;
10002
10003         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10004                default_bootup_tracer);
10005         default_bootup_tracer = NULL;
10006
10007         return 0;
10008 }
10009
10010 fs_initcall(tracer_init_tracefs);
10011 late_initcall_sync(clear_boot_tracer);
10012
10013 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10014 __init static int tracing_set_default_clock(void)
10015 {
10016         /* sched_clock_stable() is determined in late_initcall */
10017         if (!trace_boot_clock && !sched_clock_stable()) {
10018                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10019                         pr_warn("Can not set tracing clock due to lockdown\n");
10020                         return -EPERM;
10021                 }
10022
10023                 printk(KERN_WARNING
10024                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10025                        "If you want to keep using the local clock, then add:\n"
10026                        "  \"trace_clock=local\"\n"
10027                        "on the kernel command line\n");
10028                 tracing_set_clock(&global_trace, "global");
10029         }
10030
10031         return 0;
10032 }
10033 late_initcall_sync(tracing_set_default_clock);
10034 #endif