GNU Linux-libre 6.8.9-gnu
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73
74 void __init disable_tracing_selftest(const char *reason)
75 {
76         if (!tracing_selftest_disabled) {
77                 tracing_selftest_disabled = true;
78                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
79         }
80 }
81 #else
82 #define tracing_selftest_running        0
83 #define tracing_selftest_disabled       0
84 #endif
85
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188
189 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
190 static int boot_instance_index;
191
192 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_snapshot_index;
194
195 static int __init set_cmdline_ftrace(char *str)
196 {
197         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
198         default_bootup_tracer = bootup_tracer_buf;
199         /* We are using ftrace early, expand it */
200         trace_set_ring_buffer_expanded(NULL);
201         return 1;
202 }
203 __setup("ftrace=", set_cmdline_ftrace);
204
205 static int __init set_ftrace_dump_on_oops(char *str)
206 {
207         if (*str++ != '=' || !*str || !strcmp("1", str)) {
208                 ftrace_dump_on_oops = DUMP_ALL;
209                 return 1;
210         }
211
212         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
213                 ftrace_dump_on_oops = DUMP_ORIG;
214                 return 1;
215         }
216
217         return 0;
218 }
219 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220
221 static int __init stop_trace_on_warning(char *str)
222 {
223         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
224                 __disable_trace_on_warning = 1;
225         return 1;
226 }
227 __setup("traceoff_on_warning", stop_trace_on_warning);
228
229 static int __init boot_alloc_snapshot(char *str)
230 {
231         char *slot = boot_snapshot_info + boot_snapshot_index;
232         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
233         int ret;
234
235         if (str[0] == '=') {
236                 str++;
237                 if (strlen(str) >= left)
238                         return -1;
239
240                 ret = snprintf(slot, left, "%s\t", str);
241                 boot_snapshot_index += ret;
242         } else {
243                 allocate_snapshot = true;
244                 /* We also need the main ring buffer expanded */
245                 trace_set_ring_buffer_expanded(NULL);
246         }
247         return 1;
248 }
249 __setup("alloc_snapshot", boot_alloc_snapshot);
250
251
252 static int __init boot_snapshot(char *str)
253 {
254         snapshot_at_boot = true;
255         boot_alloc_snapshot(str);
256         return 1;
257 }
258 __setup("ftrace_boot_snapshot", boot_snapshot);
259
260
261 static int __init boot_instance(char *str)
262 {
263         char *slot = boot_instance_info + boot_instance_index;
264         int left = sizeof(boot_instance_info) - boot_instance_index;
265         int ret;
266
267         if (strlen(str) >= left)
268                 return -1;
269
270         ret = snprintf(slot, left, "%s\t", str);
271         boot_instance_index += ret;
272
273         return 1;
274 }
275 __setup("trace_instance=", boot_instance);
276
277
278 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
279
280 static int __init set_trace_boot_options(char *str)
281 {
282         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
283         return 1;
284 }
285 __setup("trace_options=", set_trace_boot_options);
286
287 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
288 static char *trace_boot_clock __initdata;
289
290 static int __init set_trace_boot_clock(char *str)
291 {
292         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
293         trace_boot_clock = trace_boot_clock_buf;
294         return 1;
295 }
296 __setup("trace_clock=", set_trace_boot_clock);
297
298 static int __init set_tracepoint_printk(char *str)
299 {
300         /* Ignore the "tp_printk_stop_on_boot" param */
301         if (*str == '_')
302                 return 0;
303
304         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
305                 tracepoint_printk = 1;
306         return 1;
307 }
308 __setup("tp_printk", set_tracepoint_printk);
309
310 static int __init set_tracepoint_printk_stop(char *str)
311 {
312         tracepoint_printk_stop_on_boot = true;
313         return 1;
314 }
315 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
316
317 unsigned long long ns2usecs(u64 nsec)
318 {
319         nsec += 500;
320         do_div(nsec, 1000);
321         return nsec;
322 }
323
324 static void
325 trace_process_export(struct trace_export *export,
326                struct ring_buffer_event *event, int flag)
327 {
328         struct trace_entry *entry;
329         unsigned int size = 0;
330
331         if (export->flags & flag) {
332                 entry = ring_buffer_event_data(event);
333                 size = ring_buffer_event_length(event);
334                 export->write(export, entry, size);
335         }
336 }
337
338 static DEFINE_MUTEX(ftrace_export_lock);
339
340 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
341
342 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
344 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
345
346 static inline void ftrace_exports_enable(struct trace_export *export)
347 {
348         if (export->flags & TRACE_EXPORT_FUNCTION)
349                 static_branch_inc(&trace_function_exports_enabled);
350
351         if (export->flags & TRACE_EXPORT_EVENT)
352                 static_branch_inc(&trace_event_exports_enabled);
353
354         if (export->flags & TRACE_EXPORT_MARKER)
355                 static_branch_inc(&trace_marker_exports_enabled);
356 }
357
358 static inline void ftrace_exports_disable(struct trace_export *export)
359 {
360         if (export->flags & TRACE_EXPORT_FUNCTION)
361                 static_branch_dec(&trace_function_exports_enabled);
362
363         if (export->flags & TRACE_EXPORT_EVENT)
364                 static_branch_dec(&trace_event_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_MARKER)
367                 static_branch_dec(&trace_marker_exports_enabled);
368 }
369
370 static void ftrace_exports(struct ring_buffer_event *event, int flag)
371 {
372         struct trace_export *export;
373
374         preempt_disable_notrace();
375
376         export = rcu_dereference_raw_check(ftrace_exports_list);
377         while (export) {
378                 trace_process_export(export, event, flag);
379                 export = rcu_dereference_raw_check(export->next);
380         }
381
382         preempt_enable_notrace();
383 }
384
385 static inline void
386 add_trace_export(struct trace_export **list, struct trace_export *export)
387 {
388         rcu_assign_pointer(export->next, *list);
389         /*
390          * We are entering export into the list but another
391          * CPU might be walking that list. We need to make sure
392          * the export->next pointer is valid before another CPU sees
393          * the export pointer included into the list.
394          */
395         rcu_assign_pointer(*list, export);
396 }
397
398 static inline int
399 rm_trace_export(struct trace_export **list, struct trace_export *export)
400 {
401         struct trace_export **p;
402
403         for (p = list; *p != NULL; p = &(*p)->next)
404                 if (*p == export)
405                         break;
406
407         if (*p != export)
408                 return -1;
409
410         rcu_assign_pointer(*p, (*p)->next);
411
412         return 0;
413 }
414
415 static inline void
416 add_ftrace_export(struct trace_export **list, struct trace_export *export)
417 {
418         ftrace_exports_enable(export);
419
420         add_trace_export(list, export);
421 }
422
423 static inline int
424 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
425 {
426         int ret;
427
428         ret = rm_trace_export(list, export);
429         ftrace_exports_disable(export);
430
431         return ret;
432 }
433
434 int register_ftrace_export(struct trace_export *export)
435 {
436         if (WARN_ON_ONCE(!export->write))
437                 return -1;
438
439         mutex_lock(&ftrace_export_lock);
440
441         add_ftrace_export(&ftrace_exports_list, export);
442
443         mutex_unlock(&ftrace_export_lock);
444
445         return 0;
446 }
447 EXPORT_SYMBOL_GPL(register_ftrace_export);
448
449 int unregister_ftrace_export(struct trace_export *export)
450 {
451         int ret;
452
453         mutex_lock(&ftrace_export_lock);
454
455         ret = rm_ftrace_export(&ftrace_exports_list, export);
456
457         mutex_unlock(&ftrace_export_lock);
458
459         return ret;
460 }
461 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
462
463 /* trace_flags holds trace_options default values */
464 #define TRACE_DEFAULT_FLAGS                                             \
465         (FUNCTION_DEFAULT_FLAGS |                                       \
466          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
467          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
468          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
469          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
470          TRACE_ITER_HASH_PTR)
471
472 /* trace_options that are only supported by global_trace */
473 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
474                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
475
476 /* trace_flags that are default zero for instances */
477 #define ZEROED_TRACE_FLAGS \
478         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
479
480 /*
481  * The global_trace is the descriptor that holds the top-level tracing
482  * buffers for the live tracing.
483  */
484 static struct trace_array global_trace = {
485         .trace_flags = TRACE_DEFAULT_FLAGS,
486 };
487
488 void trace_set_ring_buffer_expanded(struct trace_array *tr)
489 {
490         if (!tr)
491                 tr = &global_trace;
492         tr->ring_buffer_expanded = true;
493 }
494
495 LIST_HEAD(ftrace_trace_arrays);
496
497 int trace_array_get(struct trace_array *this_tr)
498 {
499         struct trace_array *tr;
500         int ret = -ENODEV;
501
502         mutex_lock(&trace_types_lock);
503         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504                 if (tr == this_tr) {
505                         tr->ref++;
506                         ret = 0;
507                         break;
508                 }
509         }
510         mutex_unlock(&trace_types_lock);
511
512         return ret;
513 }
514
515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517         WARN_ON(!this_tr->ref);
518         this_tr->ref--;
519 }
520
521 /**
522  * trace_array_put - Decrement the reference counter for this trace array.
523  * @this_tr : pointer to the trace array
524  *
525  * NOTE: Use this when we no longer need the trace array returned by
526  * trace_array_get_by_name(). This ensures the trace array can be later
527  * destroyed.
528  *
529  */
530 void trace_array_put(struct trace_array *this_tr)
531 {
532         if (!this_tr)
533                 return;
534
535         mutex_lock(&trace_types_lock);
536         __trace_array_put(this_tr);
537         mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540
541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543         int ret;
544
545         ret = security_locked_down(LOCKDOWN_TRACEFS);
546         if (ret)
547                 return ret;
548
549         if (tracing_disabled)
550                 return -ENODEV;
551
552         if (tr && trace_array_get(tr) < 0)
553                 return -ENODEV;
554
555         return 0;
556 }
557
558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559                               struct trace_buffer *buffer,
560                               struct ring_buffer_event *event)
561 {
562         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563             !filter_match_preds(call->filter, rec)) {
564                 __trace_event_discard_commit(buffer, event);
565                 return 1;
566         }
567
568         return 0;
569 }
570
571 /**
572  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573  * @filtered_pids: The list of pids to check
574  * @search_pid: The PID to find in @filtered_pids
575  *
576  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577  */
578 bool
579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581         return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583
584 /**
585  * trace_ignore_this_task - should a task be ignored for tracing
586  * @filtered_pids: The list of pids to check
587  * @filtered_no_pids: The list of pids not to be traced
588  * @task: The task that should be ignored if not filtered
589  *
590  * Checks if @task should be traced or not from @filtered_pids.
591  * Returns true if @task should *NOT* be traced.
592  * Returns false if @task should be traced.
593  */
594 bool
595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596                        struct trace_pid_list *filtered_no_pids,
597                        struct task_struct *task)
598 {
599         /*
600          * If filtered_no_pids is not empty, and the task's pid is listed
601          * in filtered_no_pids, then return true.
602          * Otherwise, if filtered_pids is empty, that means we can
603          * trace all tasks. If it has content, then only trace pids
604          * within filtered_pids.
605          */
606
607         return (filtered_pids &&
608                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
609                 (filtered_no_pids &&
610                  trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612
613 /**
614  * trace_filter_add_remove_task - Add or remove a task from a pid_list
615  * @pid_list: The list to modify
616  * @self: The current task for fork or NULL for exit
617  * @task: The task to add or remove
618  *
619  * If adding a task, if @self is defined, the task is only added if @self
620  * is also included in @pid_list. This happens on fork and tasks should
621  * only be added when the parent is listed. If @self is NULL, then the
622  * @task pid will be removed from the list, which would happen on exit
623  * of a task.
624  */
625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626                                   struct task_struct *self,
627                                   struct task_struct *task)
628 {
629         if (!pid_list)
630                 return;
631
632         /* For forks, we only add if the forking task is listed */
633         if (self) {
634                 if (!trace_find_filtered_pid(pid_list, self->pid))
635                         return;
636         }
637
638         /* "self" is set for forks, and NULL for exits */
639         if (self)
640                 trace_pid_list_set(pid_list, task->pid);
641         else
642                 trace_pid_list_clear(pid_list, task->pid);
643 }
644
645 /**
646  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647  * @pid_list: The pid list to show
648  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649  * @pos: The position of the file
650  *
651  * This is used by the seq_file "next" operation to iterate the pids
652  * listed in a trace_pid_list structure.
653  *
654  * Returns the pid+1 as we want to display pid of zero, but NULL would
655  * stop the iteration.
656  */
657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659         long pid = (unsigned long)v;
660         unsigned int next;
661
662         (*pos)++;
663
664         /* pid already is +1 of the actual previous bit */
665         if (trace_pid_list_next(pid_list, pid, &next) < 0)
666                 return NULL;
667
668         pid = next;
669
670         /* Return pid + 1 to allow zero to be represented */
671         return (void *)(pid + 1);
672 }
673
674 /**
675  * trace_pid_start - Used for seq_file to start reading pid lists
676  * @pid_list: The pid list to show
677  * @pos: The position of the file
678  *
679  * This is used by seq_file "start" operation to start the iteration
680  * of listing pids.
681  *
682  * Returns the pid+1 as we want to display pid of zero, but NULL would
683  * stop the iteration.
684  */
685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687         unsigned long pid;
688         unsigned int first;
689         loff_t l = 0;
690
691         if (trace_pid_list_first(pid_list, &first) < 0)
692                 return NULL;
693
694         pid = first;
695
696         /* Return pid + 1 so that zero can be the exit value */
697         for (pid++; pid && l < *pos;
698              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699                 ;
700         return (void *)pid;
701 }
702
703 /**
704  * trace_pid_show - show the current pid in seq_file processing
705  * @m: The seq_file structure to write into
706  * @v: A void pointer of the pid (+1) value to display
707  *
708  * Can be directly used by seq_file operations to display the current
709  * pid value.
710  */
711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713         unsigned long pid = (unsigned long)v - 1;
714
715         seq_printf(m, "%lu\n", pid);
716         return 0;
717 }
718
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE            127
721
722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723                     struct trace_pid_list **new_pid_list,
724                     const char __user *ubuf, size_t cnt)
725 {
726         struct trace_pid_list *pid_list;
727         struct trace_parser parser;
728         unsigned long val;
729         int nr_pids = 0;
730         ssize_t read = 0;
731         ssize_t ret;
732         loff_t pos;
733         pid_t pid;
734
735         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736                 return -ENOMEM;
737
738         /*
739          * Always recreate a new array. The write is an all or nothing
740          * operation. Always create a new array when adding new pids by
741          * the user. If the operation fails, then the current list is
742          * not modified.
743          */
744         pid_list = trace_pid_list_alloc();
745         if (!pid_list) {
746                 trace_parser_put(&parser);
747                 return -ENOMEM;
748         }
749
750         if (filtered_pids) {
751                 /* copy the current bits to the new max */
752                 ret = trace_pid_list_first(filtered_pids, &pid);
753                 while (!ret) {
754                         trace_pid_list_set(pid_list, pid);
755                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756                         nr_pids++;
757                 }
758         }
759
760         ret = 0;
761         while (cnt > 0) {
762
763                 pos = 0;
764
765                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
766                 if (ret < 0)
767                         break;
768
769                 read += ret;
770                 ubuf += ret;
771                 cnt -= ret;
772
773                 if (!trace_parser_loaded(&parser))
774                         break;
775
776                 ret = -EINVAL;
777                 if (kstrtoul(parser.buffer, 0, &val))
778                         break;
779
780                 pid = (pid_t)val;
781
782                 if (trace_pid_list_set(pid_list, pid) < 0) {
783                         ret = -1;
784                         break;
785                 }
786                 nr_pids++;
787
788                 trace_parser_clear(&parser);
789                 ret = 0;
790         }
791         trace_parser_put(&parser);
792
793         if (ret < 0) {
794                 trace_pid_list_free(pid_list);
795                 return ret;
796         }
797
798         if (!nr_pids) {
799                 /* Cleared the list of pids */
800                 trace_pid_list_free(pid_list);
801                 pid_list = NULL;
802         }
803
804         *new_pid_list = pid_list;
805
806         return read;
807 }
808
809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811         u64 ts;
812
813         /* Early boot up does not have a buffer yet */
814         if (!buf->buffer)
815                 return trace_clock_local();
816
817         ts = ring_buffer_time_stamp(buf->buffer);
818         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819
820         return ts;
821 }
822
823 u64 ftrace_now(int cpu)
824 {
825         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827
828 /**
829  * tracing_is_enabled - Show if global_trace has been enabled
830  *
831  * Shows if the global trace has been enabled or not. It uses the
832  * mirror flag "buffer_disabled" to be used in fast paths such as for
833  * the irqsoff tracer. But it may be inaccurate due to races. If you
834  * need to know the accurate state, use tracing_is_on() which is a little
835  * slower, but accurate.
836  */
837 int tracing_is_enabled(void)
838 {
839         /*
840          * For quick access (irqsoff uses this in fast path), just
841          * return the mirror variable of the state of the ring buffer.
842          * It's a little racy, but we don't really care.
843          */
844         smp_rmb();
845         return !global_trace.buffer_disabled;
846 }
847
848 /*
849  * trace_buf_size is the size in bytes that is allocated
850  * for a buffer. Note, the number of bytes is always rounded
851  * to page size.
852  *
853  * This number is purposely set to a low number of 16384.
854  * If the dump on oops happens, it will be much appreciated
855  * to not have to wait for all that output. Anyway this can be
856  * boot time and run time configurable.
857  */
858 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
859
860 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer            *trace_types __read_mostly;
864
865 /*
866  * trace_types_lock is used to protect the trace_types list.
867  */
868 DEFINE_MUTEX(trace_types_lock);
869
870 /*
871  * serialize the access of the ring buffer
872  *
873  * ring buffer serializes readers, but it is low level protection.
874  * The validity of the events (which returns by ring_buffer_peek() ..etc)
875  * are not protected by ring buffer.
876  *
877  * The content of events may become garbage if we allow other process consumes
878  * these events concurrently:
879  *   A) the page of the consumed events may become a normal page
880  *      (not reader page) in ring buffer, and this page will be rewritten
881  *      by events producer.
882  *   B) The page of the consumed events may become a page for splice_read,
883  *      and this page will be returned to system.
884  *
885  * These primitives allow multi process access to different cpu ring buffer
886  * concurrently.
887  *
888  * These primitives don't distinguish read-only and read-consume access.
889  * Multi read-only access are also serialized.
890  */
891
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895
896 static inline void trace_access_lock(int cpu)
897 {
898         if (cpu == RING_BUFFER_ALL_CPUS) {
899                 /* gain it for accessing the whole ring buffer. */
900                 down_write(&all_cpu_access_lock);
901         } else {
902                 /* gain it for accessing a cpu ring buffer. */
903
904                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905                 down_read(&all_cpu_access_lock);
906
907                 /* Secondly block other access to this @cpu ring buffer. */
908                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
909         }
910 }
911
912 static inline void trace_access_unlock(int cpu)
913 {
914         if (cpu == RING_BUFFER_ALL_CPUS) {
915                 up_write(&all_cpu_access_lock);
916         } else {
917                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918                 up_read(&all_cpu_access_lock);
919         }
920 }
921
922 static inline void trace_access_lock_init(void)
923 {
924         int cpu;
925
926         for_each_possible_cpu(cpu)
927                 mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929
930 #else
931
932 static DEFINE_MUTEX(access_lock);
933
934 static inline void trace_access_lock(int cpu)
935 {
936         (void)cpu;
937         mutex_lock(&access_lock);
938 }
939
940 static inline void trace_access_unlock(int cpu)
941 {
942         (void)cpu;
943         mutex_unlock(&access_lock);
944 }
945
946 static inline void trace_access_lock_init(void)
947 {
948 }
949
950 #endif
951
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954                                  unsigned int trace_ctx,
955                                  int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957                                       struct trace_buffer *buffer,
958                                       unsigned int trace_ctx,
959                                       int skip, struct pt_regs *regs);
960
961 #else
962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963                                         unsigned int trace_ctx,
964                                         int skip, struct pt_regs *regs)
965 {
966 }
967 static inline void ftrace_trace_stack(struct trace_array *tr,
968                                       struct trace_buffer *buffer,
969                                       unsigned long trace_ctx,
970                                       int skip, struct pt_regs *regs)
971 {
972 }
973
974 #endif
975
976 static __always_inline void
977 trace_event_setup(struct ring_buffer_event *event,
978                   int type, unsigned int trace_ctx)
979 {
980         struct trace_entry *ent = ring_buffer_event_data(event);
981
982         tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984
985 static __always_inline struct ring_buffer_event *
986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987                           int type,
988                           unsigned long len,
989                           unsigned int trace_ctx)
990 {
991         struct ring_buffer_event *event;
992
993         event = ring_buffer_lock_reserve(buffer, len);
994         if (event != NULL)
995                 trace_event_setup(event, type, trace_ctx);
996
997         return event;
998 }
999
1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002         if (tr->array_buffer.buffer)
1003                 ring_buffer_record_on(tr->array_buffer.buffer);
1004         /*
1005          * This flag is looked at when buffers haven't been allocated
1006          * yet, or by some tracers (like irqsoff), that just want to
1007          * know if the ring buffer has been disabled, but it can handle
1008          * races of where it gets disabled but we still do a record.
1009          * As the check is in the fast path of the tracers, it is more
1010          * important to be fast than accurate.
1011          */
1012         tr->buffer_disabled = 0;
1013         /* Make the flag seen by readers */
1014         smp_wmb();
1015 }
1016
1017 /**
1018  * tracing_on - enable tracing buffers
1019  *
1020  * This function enables tracing buffers that may have been
1021  * disabled with tracing_off.
1022  */
1023 void tracing_on(void)
1024 {
1025         tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028
1029
1030 static __always_inline void
1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033         __this_cpu_write(trace_taskinfo_save, true);
1034
1035         /* If this is the temp buffer, we need to commit fully */
1036         if (this_cpu_read(trace_buffered_event) == event) {
1037                 /* Length is in event->array[0] */
1038                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039                 /* Release the temp buffer */
1040                 this_cpu_dec(trace_buffered_event_cnt);
1041                 /* ring_buffer_unlock_commit() enables preemption */
1042                 preempt_enable_notrace();
1043         } else
1044                 ring_buffer_unlock_commit(buffer);
1045 }
1046
1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048                        const char *str, int size)
1049 {
1050         struct ring_buffer_event *event;
1051         struct trace_buffer *buffer;
1052         struct print_entry *entry;
1053         unsigned int trace_ctx;
1054         int alloc;
1055
1056         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057                 return 0;
1058
1059         if (unlikely(tracing_selftest_running && tr == &global_trace))
1060                 return 0;
1061
1062         if (unlikely(tracing_disabled))
1063                 return 0;
1064
1065         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066
1067         trace_ctx = tracing_gen_ctx();
1068         buffer = tr->array_buffer.buffer;
1069         ring_buffer_nest_start(buffer);
1070         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071                                             trace_ctx);
1072         if (!event) {
1073                 size = 0;
1074                 goto out;
1075         }
1076
1077         entry = ring_buffer_event_data(event);
1078         entry->ip = ip;
1079
1080         memcpy(&entry->buf, str, size);
1081
1082         /* Add a newline if necessary */
1083         if (entry->buf[size - 1] != '\n') {
1084                 entry->buf[size] = '\n';
1085                 entry->buf[size + 1] = '\0';
1086         } else
1087                 entry->buf[size] = '\0';
1088
1089         __buffer_unlock_commit(buffer, event);
1090         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096
1097 /**
1098  * __trace_puts - write a constant string into the trace buffer.
1099  * @ip:    The address of the caller
1100  * @str:   The constant string to write
1101  * @size:  The size of the string.
1102  */
1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105         return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108
1109 /**
1110  * __trace_bputs - write the pointer to a constant string into trace buffer
1111  * @ip:    The address of the caller
1112  * @str:   The constant string to write to the buffer to
1113  */
1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116         struct ring_buffer_event *event;
1117         struct trace_buffer *buffer;
1118         struct bputs_entry *entry;
1119         unsigned int trace_ctx;
1120         int size = sizeof(struct bputs_entry);
1121         int ret = 0;
1122
1123         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124                 return 0;
1125
1126         if (unlikely(tracing_selftest_running || tracing_disabled))
1127                 return 0;
1128
1129         trace_ctx = tracing_gen_ctx();
1130         buffer = global_trace.array_buffer.buffer;
1131
1132         ring_buffer_nest_start(buffer);
1133         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134                                             trace_ctx);
1135         if (!event)
1136                 goto out;
1137
1138         entry = ring_buffer_event_data(event);
1139         entry->ip                       = ip;
1140         entry->str                      = str;
1141
1142         __buffer_unlock_commit(buffer, event);
1143         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144
1145         ret = 1;
1146  out:
1147         ring_buffer_nest_end(buffer);
1148         return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151
1152 #ifdef CONFIG_TRACER_SNAPSHOT
1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154                                            void *cond_data)
1155 {
1156         struct tracer *tracer = tr->current_trace;
1157         unsigned long flags;
1158
1159         if (in_nmi()) {
1160                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1162                 return;
1163         }
1164
1165         if (!tr->allocated_snapshot) {
1166                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1168                 tracer_tracing_off(tr);
1169                 return;
1170         }
1171
1172         /* Note, snapshot can not be used when the tracer uses it */
1173         if (tracer->use_max_tr) {
1174                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176                 return;
1177         }
1178
1179         local_irq_save(flags);
1180         update_max_tr(tr, current, smp_processor_id(), cond_data);
1181         local_irq_restore(flags);
1182 }
1183
1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186         tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188
1189 /**
1190  * tracing_snapshot - take a snapshot of the current buffer.
1191  *
1192  * This causes a swap between the snapshot buffer and the current live
1193  * tracing buffer. You can use this to take snapshots of the live
1194  * trace when some condition is triggered, but continue to trace.
1195  *
1196  * Note, make sure to allocate the snapshot with either
1197  * a tracing_snapshot_alloc(), or by doing it manually
1198  * with: echo 1 > /sys/kernel/tracing/snapshot
1199  *
1200  * If the snapshot buffer is not allocated, it will stop tracing.
1201  * Basically making a permanent snapshot.
1202  */
1203 void tracing_snapshot(void)
1204 {
1205         struct trace_array *tr = &global_trace;
1206
1207         tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210
1211 /**
1212  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213  * @tr:         The tracing instance to snapshot
1214  * @cond_data:  The data to be tested conditionally, and possibly saved
1215  *
1216  * This is the same as tracing_snapshot() except that the snapshot is
1217  * conditional - the snapshot will only happen if the
1218  * cond_snapshot.update() implementation receiving the cond_data
1219  * returns true, which means that the trace array's cond_snapshot
1220  * update() operation used the cond_data to determine whether the
1221  * snapshot should be taken, and if it was, presumably saved it along
1222  * with the snapshot.
1223  */
1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226         tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229
1230 /**
1231  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232  * @tr:         The tracing instance
1233  *
1234  * When the user enables a conditional snapshot using
1235  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236  * with the snapshot.  This accessor is used to retrieve it.
1237  *
1238  * Should not be called from cond_snapshot.update(), since it takes
1239  * the tr->max_lock lock, which the code calling
1240  * cond_snapshot.update() has already done.
1241  *
1242  * Returns the cond_data associated with the trace array's snapshot.
1243  */
1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246         void *cond_data = NULL;
1247
1248         local_irq_disable();
1249         arch_spin_lock(&tr->max_lock);
1250
1251         if (tr->cond_snapshot)
1252                 cond_data = tr->cond_snapshot->cond_data;
1253
1254         arch_spin_unlock(&tr->max_lock);
1255         local_irq_enable();
1256
1257         return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262                                         struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264
1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267         int order;
1268         int ret;
1269
1270         if (!tr->allocated_snapshot) {
1271
1272                 /* Make the snapshot buffer have the same order as main buffer */
1273                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1274                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1275                 if (ret < 0)
1276                         return ret;
1277
1278                 /* allocate spare buffer */
1279                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1280                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1281                 if (ret < 0)
1282                         return ret;
1283
1284                 tr->allocated_snapshot = true;
1285         }
1286
1287         return 0;
1288 }
1289
1290 static void free_snapshot(struct trace_array *tr)
1291 {
1292         /*
1293          * We don't free the ring buffer. instead, resize it because
1294          * The max_tr ring buffer has some state (e.g. ring->clock) and
1295          * we want preserve it.
1296          */
1297         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1298         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1299         set_buffer_entries(&tr->max_buffer, 1);
1300         tracing_reset_online_cpus(&tr->max_buffer);
1301         tr->allocated_snapshot = false;
1302 }
1303
1304 /**
1305  * tracing_alloc_snapshot - allocate snapshot buffer.
1306  *
1307  * This only allocates the snapshot buffer if it isn't already
1308  * allocated - it doesn't also take a snapshot.
1309  *
1310  * This is meant to be used in cases where the snapshot buffer needs
1311  * to be set up for events that can't sleep but need to be able to
1312  * trigger a snapshot.
1313  */
1314 int tracing_alloc_snapshot(void)
1315 {
1316         struct trace_array *tr = &global_trace;
1317         int ret;
1318
1319         ret = tracing_alloc_snapshot_instance(tr);
1320         WARN_ON(ret < 0);
1321
1322         return ret;
1323 }
1324 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1325
1326 /**
1327  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1328  *
1329  * This is similar to tracing_snapshot(), but it will allocate the
1330  * snapshot buffer if it isn't already allocated. Use this only
1331  * where it is safe to sleep, as the allocation may sleep.
1332  *
1333  * This causes a swap between the snapshot buffer and the current live
1334  * tracing buffer. You can use this to take snapshots of the live
1335  * trace when some condition is triggered, but continue to trace.
1336  */
1337 void tracing_snapshot_alloc(void)
1338 {
1339         int ret;
1340
1341         ret = tracing_alloc_snapshot();
1342         if (ret < 0)
1343                 return;
1344
1345         tracing_snapshot();
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1348
1349 /**
1350  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  * @cond_data:  User data to associate with the snapshot
1353  * @update:     Implementation of the cond_snapshot update function
1354  *
1355  * Check whether the conditional snapshot for the given instance has
1356  * already been enabled, or if the current tracer is already using a
1357  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1358  * save the cond_data and update function inside.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1363                                  cond_update_fn_t update)
1364 {
1365         struct cond_snapshot *cond_snapshot;
1366         int ret = 0;
1367
1368         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1369         if (!cond_snapshot)
1370                 return -ENOMEM;
1371
1372         cond_snapshot->cond_data = cond_data;
1373         cond_snapshot->update = update;
1374
1375         mutex_lock(&trace_types_lock);
1376
1377         ret = tracing_alloc_snapshot_instance(tr);
1378         if (ret)
1379                 goto fail_unlock;
1380
1381         if (tr->current_trace->use_max_tr) {
1382                 ret = -EBUSY;
1383                 goto fail_unlock;
1384         }
1385
1386         /*
1387          * The cond_snapshot can only change to NULL without the
1388          * trace_types_lock. We don't care if we race with it going
1389          * to NULL, but we want to make sure that it's not set to
1390          * something other than NULL when we get here, which we can
1391          * do safely with only holding the trace_types_lock and not
1392          * having to take the max_lock.
1393          */
1394         if (tr->cond_snapshot) {
1395                 ret = -EBUSY;
1396                 goto fail_unlock;
1397         }
1398
1399         local_irq_disable();
1400         arch_spin_lock(&tr->max_lock);
1401         tr->cond_snapshot = cond_snapshot;
1402         arch_spin_unlock(&tr->max_lock);
1403         local_irq_enable();
1404
1405         mutex_unlock(&trace_types_lock);
1406
1407         return ret;
1408
1409  fail_unlock:
1410         mutex_unlock(&trace_types_lock);
1411         kfree(cond_snapshot);
1412         return ret;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1415
1416 /**
1417  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1418  * @tr:         The tracing instance
1419  *
1420  * Check whether the conditional snapshot for the given instance is
1421  * enabled; if so, free the cond_snapshot associated with it,
1422  * otherwise return -EINVAL.
1423  *
1424  * Returns 0 if successful, error otherwise.
1425  */
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         int ret = 0;
1429
1430         local_irq_disable();
1431         arch_spin_lock(&tr->max_lock);
1432
1433         if (!tr->cond_snapshot)
1434                 ret = -EINVAL;
1435         else {
1436                 kfree(tr->cond_snapshot);
1437                 tr->cond_snapshot = NULL;
1438         }
1439
1440         arch_spin_unlock(&tr->max_lock);
1441         local_irq_enable();
1442
1443         return ret;
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1446 #else
1447 void tracing_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_snapshot);
1452 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1453 {
1454         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1455 }
1456 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1457 int tracing_alloc_snapshot(void)
1458 {
1459         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1460         return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1463 void tracing_snapshot_alloc(void)
1464 {
1465         /* Give warning */
1466         tracing_snapshot();
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1469 void *tracing_cond_snapshot_data(struct trace_array *tr)
1470 {
1471         return NULL;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1474 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1475 {
1476         return -ENODEV;
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1479 int tracing_snapshot_cond_disable(struct trace_array *tr)
1480 {
1481         return false;
1482 }
1483 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1484 #define free_snapshot(tr)       do { } while (0)
1485 #endif /* CONFIG_TRACER_SNAPSHOT */
1486
1487 void tracer_tracing_off(struct trace_array *tr)
1488 {
1489         if (tr->array_buffer.buffer)
1490                 ring_buffer_record_off(tr->array_buffer.buffer);
1491         /*
1492          * This flag is looked at when buffers haven't been allocated
1493          * yet, or by some tracers (like irqsoff), that just want to
1494          * know if the ring buffer has been disabled, but it can handle
1495          * races of where it gets disabled but we still do a record.
1496          * As the check is in the fast path of the tracers, it is more
1497          * important to be fast than accurate.
1498          */
1499         tr->buffer_disabled = 1;
1500         /* Make the flag seen by readers */
1501         smp_wmb();
1502 }
1503
1504 /**
1505  * tracing_off - turn off tracing buffers
1506  *
1507  * This function stops the tracing buffers from recording data.
1508  * It does not disable any overhead the tracers themselves may
1509  * be causing. This function simply causes all recording to
1510  * the ring buffers to fail.
1511  */
1512 void tracing_off(void)
1513 {
1514         tracer_tracing_off(&global_trace);
1515 }
1516 EXPORT_SYMBOL_GPL(tracing_off);
1517
1518 void disable_trace_on_warning(void)
1519 {
1520         if (__disable_trace_on_warning) {
1521                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1522                         "Disabling tracing due to warning\n");
1523                 tracing_off();
1524         }
1525 }
1526
1527 /**
1528  * tracer_tracing_is_on - show real state of ring buffer enabled
1529  * @tr : the trace array to know if ring buffer is enabled
1530  *
1531  * Shows real state of the ring buffer if it is enabled or not.
1532  */
1533 bool tracer_tracing_is_on(struct trace_array *tr)
1534 {
1535         if (tr->array_buffer.buffer)
1536                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1537         return !tr->buffer_disabled;
1538 }
1539
1540 /**
1541  * tracing_is_on - show state of ring buffers enabled
1542  */
1543 int tracing_is_on(void)
1544 {
1545         return tracer_tracing_is_on(&global_trace);
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_is_on);
1548
1549 static int __init set_buf_size(char *str)
1550 {
1551         unsigned long buf_size;
1552
1553         if (!str)
1554                 return 0;
1555         buf_size = memparse(str, &str);
1556         /*
1557          * nr_entries can not be zero and the startup
1558          * tests require some buffer space. Therefore
1559          * ensure we have at least 4096 bytes of buffer.
1560          */
1561         trace_buf_size = max(4096UL, buf_size);
1562         return 1;
1563 }
1564 __setup("trace_buf_size=", set_buf_size);
1565
1566 static int __init set_tracing_thresh(char *str)
1567 {
1568         unsigned long threshold;
1569         int ret;
1570
1571         if (!str)
1572                 return 0;
1573         ret = kstrtoul(str, 0, &threshold);
1574         if (ret < 0)
1575                 return 0;
1576         tracing_thresh = threshold * 1000;
1577         return 1;
1578 }
1579 __setup("tracing_thresh=", set_tracing_thresh);
1580
1581 unsigned long nsecs_to_usecs(unsigned long nsecs)
1582 {
1583         return nsecs / 1000;
1584 }
1585
1586 /*
1587  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1588  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1589  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1590  * of strings in the order that the evals (enum) were defined.
1591  */
1592 #undef C
1593 #define C(a, b) b
1594
1595 /* These must match the bit positions in trace_iterator_flags */
1596 static const char *trace_options[] = {
1597         TRACE_FLAGS
1598         NULL
1599 };
1600
1601 static struct {
1602         u64 (*func)(void);
1603         const char *name;
1604         int in_ns;              /* is this clock in nanoseconds? */
1605 } trace_clocks[] = {
1606         { trace_clock_local,            "local",        1 },
1607         { trace_clock_global,           "global",       1 },
1608         { trace_clock_counter,          "counter",      0 },
1609         { trace_clock_jiffies,          "uptime",       0 },
1610         { trace_clock,                  "perf",         1 },
1611         { ktime_get_mono_fast_ns,       "mono",         1 },
1612         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1613         { ktime_get_boot_fast_ns,       "boot",         1 },
1614         { ktime_get_tai_fast_ns,        "tai",          1 },
1615         ARCH_TRACE_CLOCKS
1616 };
1617
1618 bool trace_clock_in_ns(struct trace_array *tr)
1619 {
1620         if (trace_clocks[tr->clock_id].in_ns)
1621                 return true;
1622
1623         return false;
1624 }
1625
1626 /*
1627  * trace_parser_get_init - gets the buffer for trace parser
1628  */
1629 int trace_parser_get_init(struct trace_parser *parser, int size)
1630 {
1631         memset(parser, 0, sizeof(*parser));
1632
1633         parser->buffer = kmalloc(size, GFP_KERNEL);
1634         if (!parser->buffer)
1635                 return 1;
1636
1637         parser->size = size;
1638         return 0;
1639 }
1640
1641 /*
1642  * trace_parser_put - frees the buffer for trace parser
1643  */
1644 void trace_parser_put(struct trace_parser *parser)
1645 {
1646         kfree(parser->buffer);
1647         parser->buffer = NULL;
1648 }
1649
1650 /*
1651  * trace_get_user - reads the user input string separated by  space
1652  * (matched by isspace(ch))
1653  *
1654  * For each string found the 'struct trace_parser' is updated,
1655  * and the function returns.
1656  *
1657  * Returns number of bytes read.
1658  *
1659  * See kernel/trace/trace.h for 'struct trace_parser' details.
1660  */
1661 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1662         size_t cnt, loff_t *ppos)
1663 {
1664         char ch;
1665         size_t read = 0;
1666         ssize_t ret;
1667
1668         if (!*ppos)
1669                 trace_parser_clear(parser);
1670
1671         ret = get_user(ch, ubuf++);
1672         if (ret)
1673                 goto out;
1674
1675         read++;
1676         cnt--;
1677
1678         /*
1679          * The parser is not finished with the last write,
1680          * continue reading the user input without skipping spaces.
1681          */
1682         if (!parser->cont) {
1683                 /* skip white space */
1684                 while (cnt && isspace(ch)) {
1685                         ret = get_user(ch, ubuf++);
1686                         if (ret)
1687                                 goto out;
1688                         read++;
1689                         cnt--;
1690                 }
1691
1692                 parser->idx = 0;
1693
1694                 /* only spaces were written */
1695                 if (isspace(ch) || !ch) {
1696                         *ppos += read;
1697                         ret = read;
1698                         goto out;
1699                 }
1700         }
1701
1702         /* read the non-space input */
1703         while (cnt && !isspace(ch) && ch) {
1704                 if (parser->idx < parser->size - 1)
1705                         parser->buffer[parser->idx++] = ch;
1706                 else {
1707                         ret = -EINVAL;
1708                         goto out;
1709                 }
1710                 ret = get_user(ch, ubuf++);
1711                 if (ret)
1712                         goto out;
1713                 read++;
1714                 cnt--;
1715         }
1716
1717         /* We either got finished input or we have to wait for another call. */
1718         if (isspace(ch) || !ch) {
1719                 parser->buffer[parser->idx] = 0;
1720                 parser->cont = false;
1721         } else if (parser->idx < parser->size - 1) {
1722                 parser->cont = true;
1723                 parser->buffer[parser->idx++] = ch;
1724                 /* Make sure the parsed string always terminates with '\0'. */
1725                 parser->buffer[parser->idx] = 0;
1726         } else {
1727                 ret = -EINVAL;
1728                 goto out;
1729         }
1730
1731         *ppos += read;
1732         ret = read;
1733
1734 out:
1735         return ret;
1736 }
1737
1738 /* TODO add a seq_buf_to_buffer() */
1739 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1740 {
1741         int len;
1742
1743         if (trace_seq_used(s) <= s->readpos)
1744                 return -EBUSY;
1745
1746         len = trace_seq_used(s) - s->readpos;
1747         if (cnt > len)
1748                 cnt = len;
1749         memcpy(buf, s->buffer + s->readpos, cnt);
1750
1751         s->readpos += cnt;
1752         return cnt;
1753 }
1754
1755 unsigned long __read_mostly     tracing_thresh;
1756
1757 #ifdef CONFIG_TRACER_MAX_TRACE
1758 static const struct file_operations tracing_max_lat_fops;
1759
1760 #ifdef LATENCY_FS_NOTIFY
1761
1762 static struct workqueue_struct *fsnotify_wq;
1763
1764 static void latency_fsnotify_workfn(struct work_struct *work)
1765 {
1766         struct trace_array *tr = container_of(work, struct trace_array,
1767                                               fsnotify_work);
1768         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1769 }
1770
1771 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1772 {
1773         struct trace_array *tr = container_of(iwork, struct trace_array,
1774                                               fsnotify_irqwork);
1775         queue_work(fsnotify_wq, &tr->fsnotify_work);
1776 }
1777
1778 static void trace_create_maxlat_file(struct trace_array *tr,
1779                                      struct dentry *d_tracer)
1780 {
1781         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1782         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1783         tr->d_max_latency = trace_create_file("tracing_max_latency",
1784                                               TRACE_MODE_WRITE,
1785                                               d_tracer, tr,
1786                                               &tracing_max_lat_fops);
1787 }
1788
1789 __init static int latency_fsnotify_init(void)
1790 {
1791         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1792                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1793         if (!fsnotify_wq) {
1794                 pr_err("Unable to allocate tr_max_lat_wq\n");
1795                 return -ENOMEM;
1796         }
1797         return 0;
1798 }
1799
1800 late_initcall_sync(latency_fsnotify_init);
1801
1802 void latency_fsnotify(struct trace_array *tr)
1803 {
1804         if (!fsnotify_wq)
1805                 return;
1806         /*
1807          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1808          * possible that we are called from __schedule() or do_idle(), which
1809          * could cause a deadlock.
1810          */
1811         irq_work_queue(&tr->fsnotify_irqwork);
1812 }
1813
1814 #else /* !LATENCY_FS_NOTIFY */
1815
1816 #define trace_create_maxlat_file(tr, d_tracer)                          \
1817         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1818                           d_tracer, tr, &tracing_max_lat_fops)
1819
1820 #endif
1821
1822 /*
1823  * Copy the new maximum trace into the separate maximum-trace
1824  * structure. (this way the maximum trace is permanently saved,
1825  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1826  */
1827 static void
1828 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1829 {
1830         struct array_buffer *trace_buf = &tr->array_buffer;
1831         struct array_buffer *max_buf = &tr->max_buffer;
1832         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1833         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1834
1835         max_buf->cpu = cpu;
1836         max_buf->time_start = data->preempt_timestamp;
1837
1838         max_data->saved_latency = tr->max_latency;
1839         max_data->critical_start = data->critical_start;
1840         max_data->critical_end = data->critical_end;
1841
1842         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1843         max_data->pid = tsk->pid;
1844         /*
1845          * If tsk == current, then use current_uid(), as that does not use
1846          * RCU. The irq tracer can be called out of RCU scope.
1847          */
1848         if (tsk == current)
1849                 max_data->uid = current_uid();
1850         else
1851                 max_data->uid = task_uid(tsk);
1852
1853         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1854         max_data->policy = tsk->policy;
1855         max_data->rt_priority = tsk->rt_priority;
1856
1857         /* record this tasks comm */
1858         tracing_record_cmdline(tsk);
1859         latency_fsnotify(tr);
1860 }
1861
1862 /**
1863  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1864  * @tr: tracer
1865  * @tsk: the task with the latency
1866  * @cpu: The cpu that initiated the trace.
1867  * @cond_data: User data associated with a conditional snapshot
1868  *
1869  * Flip the buffers between the @tr and the max_tr and record information
1870  * about which task was the cause of this latency.
1871  */
1872 void
1873 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1874               void *cond_data)
1875 {
1876         if (tr->stop_count)
1877                 return;
1878
1879         WARN_ON_ONCE(!irqs_disabled());
1880
1881         if (!tr->allocated_snapshot) {
1882                 /* Only the nop tracer should hit this when disabling */
1883                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1884                 return;
1885         }
1886
1887         arch_spin_lock(&tr->max_lock);
1888
1889         /* Inherit the recordable setting from array_buffer */
1890         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1891                 ring_buffer_record_on(tr->max_buffer.buffer);
1892         else
1893                 ring_buffer_record_off(tr->max_buffer.buffer);
1894
1895 #ifdef CONFIG_TRACER_SNAPSHOT
1896         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1897                 arch_spin_unlock(&tr->max_lock);
1898                 return;
1899         }
1900 #endif
1901         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1902
1903         __update_max_tr(tr, tsk, cpu);
1904
1905         arch_spin_unlock(&tr->max_lock);
1906
1907         /* Any waiters on the old snapshot buffer need to wake up */
1908         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1909 }
1910
1911 /**
1912  * update_max_tr_single - only copy one trace over, and reset the rest
1913  * @tr: tracer
1914  * @tsk: task with the latency
1915  * @cpu: the cpu of the buffer to copy.
1916  *
1917  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1918  */
1919 void
1920 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1921 {
1922         int ret;
1923
1924         if (tr->stop_count)
1925                 return;
1926
1927         WARN_ON_ONCE(!irqs_disabled());
1928         if (!tr->allocated_snapshot) {
1929                 /* Only the nop tracer should hit this when disabling */
1930                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1931                 return;
1932         }
1933
1934         arch_spin_lock(&tr->max_lock);
1935
1936         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1937
1938         if (ret == -EBUSY) {
1939                 /*
1940                  * We failed to swap the buffer due to a commit taking
1941                  * place on this CPU. We fail to record, but we reset
1942                  * the max trace buffer (no one writes directly to it)
1943                  * and flag that it failed.
1944                  * Another reason is resize is in progress.
1945                  */
1946                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1947                         "Failed to swap buffers due to commit or resize in progress\n");
1948         }
1949
1950         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1951
1952         __update_max_tr(tr, tsk, cpu);
1953         arch_spin_unlock(&tr->max_lock);
1954 }
1955
1956 #endif /* CONFIG_TRACER_MAX_TRACE */
1957
1958 struct pipe_wait {
1959         struct trace_iterator           *iter;
1960         int                             wait_index;
1961 };
1962
1963 static bool wait_pipe_cond(void *data)
1964 {
1965         struct pipe_wait *pwait = data;
1966         struct trace_iterator *iter = pwait->iter;
1967
1968         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1969                 return true;
1970
1971         return iter->closed;
1972 }
1973
1974 static int wait_on_pipe(struct trace_iterator *iter, int full)
1975 {
1976         struct pipe_wait pwait;
1977         int ret;
1978
1979         /* Iterators are static, they should be filled or empty */
1980         if (trace_buffer_iter(iter, iter->cpu_file))
1981                 return 0;
1982
1983         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1984         pwait.iter = iter;
1985
1986         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1987                                wait_pipe_cond, &pwait);
1988
1989 #ifdef CONFIG_TRACER_MAX_TRACE
1990         /*
1991          * Make sure this is still the snapshot buffer, as if a snapshot were
1992          * to happen, this would now be the main buffer.
1993          */
1994         if (iter->snapshot)
1995                 iter->array_buffer = &iter->tr->max_buffer;
1996 #endif
1997         return ret;
1998 }
1999
2000 #ifdef CONFIG_FTRACE_STARTUP_TEST
2001 static bool selftests_can_run;
2002
2003 struct trace_selftests {
2004         struct list_head                list;
2005         struct tracer                   *type;
2006 };
2007
2008 static LIST_HEAD(postponed_selftests);
2009
2010 static int save_selftest(struct tracer *type)
2011 {
2012         struct trace_selftests *selftest;
2013
2014         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2015         if (!selftest)
2016                 return -ENOMEM;
2017
2018         selftest->type = type;
2019         list_add(&selftest->list, &postponed_selftests);
2020         return 0;
2021 }
2022
2023 static int run_tracer_selftest(struct tracer *type)
2024 {
2025         struct trace_array *tr = &global_trace;
2026         struct tracer *saved_tracer = tr->current_trace;
2027         int ret;
2028
2029         if (!type->selftest || tracing_selftest_disabled)
2030                 return 0;
2031
2032         /*
2033          * If a tracer registers early in boot up (before scheduling is
2034          * initialized and such), then do not run its selftests yet.
2035          * Instead, run it a little later in the boot process.
2036          */
2037         if (!selftests_can_run)
2038                 return save_selftest(type);
2039
2040         if (!tracing_is_on()) {
2041                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2042                         type->name);
2043                 return 0;
2044         }
2045
2046         /*
2047          * Run a selftest on this tracer.
2048          * Here we reset the trace buffer, and set the current
2049          * tracer to be this tracer. The tracer can then run some
2050          * internal tracing to verify that everything is in order.
2051          * If we fail, we do not register this tracer.
2052          */
2053         tracing_reset_online_cpus(&tr->array_buffer);
2054
2055         tr->current_trace = type;
2056
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058         if (type->use_max_tr) {
2059                 /* If we expanded the buffers, make sure the max is expanded too */
2060                 if (tr->ring_buffer_expanded)
2061                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2062                                            RING_BUFFER_ALL_CPUS);
2063                 tr->allocated_snapshot = true;
2064         }
2065 #endif
2066
2067         /* the test is responsible for initializing and enabling */
2068         pr_info("Testing tracer %s: ", type->name);
2069         ret = type->selftest(type, tr);
2070         /* the test is responsible for resetting too */
2071         tr->current_trace = saved_tracer;
2072         if (ret) {
2073                 printk(KERN_CONT "FAILED!\n");
2074                 /* Add the warning after printing 'FAILED' */
2075                 WARN_ON(1);
2076                 return -1;
2077         }
2078         /* Only reset on passing, to avoid touching corrupted buffers */
2079         tracing_reset_online_cpus(&tr->array_buffer);
2080
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082         if (type->use_max_tr) {
2083                 tr->allocated_snapshot = false;
2084
2085                 /* Shrink the max buffer again */
2086                 if (tr->ring_buffer_expanded)
2087                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2088                                            RING_BUFFER_ALL_CPUS);
2089         }
2090 #endif
2091
2092         printk(KERN_CONT "PASSED\n");
2093         return 0;
2094 }
2095
2096 static int do_run_tracer_selftest(struct tracer *type)
2097 {
2098         int ret;
2099
2100         /*
2101          * Tests can take a long time, especially if they are run one after the
2102          * other, as does happen during bootup when all the tracers are
2103          * registered. This could cause the soft lockup watchdog to trigger.
2104          */
2105         cond_resched();
2106
2107         tracing_selftest_running = true;
2108         ret = run_tracer_selftest(type);
2109         tracing_selftest_running = false;
2110
2111         return ret;
2112 }
2113
2114 static __init int init_trace_selftests(void)
2115 {
2116         struct trace_selftests *p, *n;
2117         struct tracer *t, **last;
2118         int ret;
2119
2120         selftests_can_run = true;
2121
2122         mutex_lock(&trace_types_lock);
2123
2124         if (list_empty(&postponed_selftests))
2125                 goto out;
2126
2127         pr_info("Running postponed tracer tests:\n");
2128
2129         tracing_selftest_running = true;
2130         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2131                 /* This loop can take minutes when sanitizers are enabled, so
2132                  * lets make sure we allow RCU processing.
2133                  */
2134                 cond_resched();
2135                 ret = run_tracer_selftest(p->type);
2136                 /* If the test fails, then warn and remove from available_tracers */
2137                 if (ret < 0) {
2138                         WARN(1, "tracer: %s failed selftest, disabling\n",
2139                              p->type->name);
2140                         last = &trace_types;
2141                         for (t = trace_types; t; t = t->next) {
2142                                 if (t == p->type) {
2143                                         *last = t->next;
2144                                         break;
2145                                 }
2146                                 last = &t->next;
2147                         }
2148                 }
2149                 list_del(&p->list);
2150                 kfree(p);
2151         }
2152         tracing_selftest_running = false;
2153
2154  out:
2155         mutex_unlock(&trace_types_lock);
2156
2157         return 0;
2158 }
2159 core_initcall(init_trace_selftests);
2160 #else
2161 static inline int run_tracer_selftest(struct tracer *type)
2162 {
2163         return 0;
2164 }
2165 static inline int do_run_tracer_selftest(struct tracer *type)
2166 {
2167         return 0;
2168 }
2169 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2170
2171 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2172
2173 static void __init apply_trace_boot_options(void);
2174
2175 /**
2176  * register_tracer - register a tracer with the ftrace system.
2177  * @type: the plugin for the tracer
2178  *
2179  * Register a new plugin tracer.
2180  */
2181 int __init register_tracer(struct tracer *type)
2182 {
2183         struct tracer *t;
2184         int ret = 0;
2185
2186         if (!type->name) {
2187                 pr_info("Tracer must have a name\n");
2188                 return -1;
2189         }
2190
2191         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2192                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2193                 return -1;
2194         }
2195
2196         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2197                 pr_warn("Can not register tracer %s due to lockdown\n",
2198                            type->name);
2199                 return -EPERM;
2200         }
2201
2202         mutex_lock(&trace_types_lock);
2203
2204         for (t = trace_types; t; t = t->next) {
2205                 if (strcmp(type->name, t->name) == 0) {
2206                         /* already found */
2207                         pr_info("Tracer %s already registered\n",
2208                                 type->name);
2209                         ret = -1;
2210                         goto out;
2211                 }
2212         }
2213
2214         if (!type->set_flag)
2215                 type->set_flag = &dummy_set_flag;
2216         if (!type->flags) {
2217                 /*allocate a dummy tracer_flags*/
2218                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2219                 if (!type->flags) {
2220                         ret = -ENOMEM;
2221                         goto out;
2222                 }
2223                 type->flags->val = 0;
2224                 type->flags->opts = dummy_tracer_opt;
2225         } else
2226                 if (!type->flags->opts)
2227                         type->flags->opts = dummy_tracer_opt;
2228
2229         /* store the tracer for __set_tracer_option */
2230         type->flags->trace = type;
2231
2232         ret = do_run_tracer_selftest(type);
2233         if (ret < 0)
2234                 goto out;
2235
2236         type->next = trace_types;
2237         trace_types = type;
2238         add_tracer_options(&global_trace, type);
2239
2240  out:
2241         mutex_unlock(&trace_types_lock);
2242
2243         if (ret || !default_bootup_tracer)
2244                 goto out_unlock;
2245
2246         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2247                 goto out_unlock;
2248
2249         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2250         /* Do we want this tracer to start on bootup? */
2251         tracing_set_tracer(&global_trace, type->name);
2252         default_bootup_tracer = NULL;
2253
2254         apply_trace_boot_options();
2255
2256         /* disable other selftests, since this will break it. */
2257         disable_tracing_selftest("running a tracer");
2258
2259  out_unlock:
2260         return ret;
2261 }
2262
2263 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2264 {
2265         struct trace_buffer *buffer = buf->buffer;
2266
2267         if (!buffer)
2268                 return;
2269
2270         ring_buffer_record_disable(buffer);
2271
2272         /* Make sure all commits have finished */
2273         synchronize_rcu();
2274         ring_buffer_reset_cpu(buffer, cpu);
2275
2276         ring_buffer_record_enable(buffer);
2277 }
2278
2279 void tracing_reset_online_cpus(struct array_buffer *buf)
2280 {
2281         struct trace_buffer *buffer = buf->buffer;
2282
2283         if (!buffer)
2284                 return;
2285
2286         ring_buffer_record_disable(buffer);
2287
2288         /* Make sure all commits have finished */
2289         synchronize_rcu();
2290
2291         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2292
2293         ring_buffer_reset_online_cpus(buffer);
2294
2295         ring_buffer_record_enable(buffer);
2296 }
2297
2298 /* Must have trace_types_lock held */
2299 void tracing_reset_all_online_cpus_unlocked(void)
2300 {
2301         struct trace_array *tr;
2302
2303         lockdep_assert_held(&trace_types_lock);
2304
2305         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2306                 if (!tr->clear_trace)
2307                         continue;
2308                 tr->clear_trace = false;
2309                 tracing_reset_online_cpus(&tr->array_buffer);
2310 #ifdef CONFIG_TRACER_MAX_TRACE
2311                 tracing_reset_online_cpus(&tr->max_buffer);
2312 #endif
2313         }
2314 }
2315
2316 void tracing_reset_all_online_cpus(void)
2317 {
2318         mutex_lock(&trace_types_lock);
2319         tracing_reset_all_online_cpus_unlocked();
2320         mutex_unlock(&trace_types_lock);
2321 }
2322
2323 /*
2324  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2325  * is the tgid last observed corresponding to pid=i.
2326  */
2327 static int *tgid_map;
2328
2329 /* The maximum valid index into tgid_map. */
2330 static size_t tgid_map_max;
2331
2332 #define SAVED_CMDLINES_DEFAULT 128
2333 #define NO_CMDLINE_MAP UINT_MAX
2334 /*
2335  * Preemption must be disabled before acquiring trace_cmdline_lock.
2336  * The various trace_arrays' max_lock must be acquired in a context
2337  * where interrupt is disabled.
2338  */
2339 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2340 struct saved_cmdlines_buffer {
2341         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2342         unsigned *map_cmdline_to_pid;
2343         unsigned cmdline_num;
2344         int cmdline_idx;
2345         char saved_cmdlines[];
2346 };
2347 static struct saved_cmdlines_buffer *savedcmd;
2348
2349 static inline char *get_saved_cmdlines(int idx)
2350 {
2351         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2352 }
2353
2354 static inline void set_cmdline(int idx, const char *cmdline)
2355 {
2356         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2357 }
2358
2359 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2360 {
2361         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2362
2363         kfree(s->map_cmdline_to_pid);
2364         kmemleak_free(s);
2365         free_pages((unsigned long)s, order);
2366 }
2367
2368 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2369 {
2370         struct saved_cmdlines_buffer *s;
2371         struct page *page;
2372         int orig_size, size;
2373         int order;
2374
2375         /* Figure out how much is needed to hold the given number of cmdlines */
2376         orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2377         order = get_order(orig_size);
2378         size = 1 << (order + PAGE_SHIFT);
2379         page = alloc_pages(GFP_KERNEL, order);
2380         if (!page)
2381                 return NULL;
2382
2383         s = page_address(page);
2384         kmemleak_alloc(s, size, 1, GFP_KERNEL);
2385         memset(s, 0, sizeof(*s));
2386
2387         /* Round up to actual allocation */
2388         val = (size - sizeof(*s)) / TASK_COMM_LEN;
2389         s->cmdline_num = val;
2390
2391         s->map_cmdline_to_pid = kmalloc_array(val,
2392                                               sizeof(*s->map_cmdline_to_pid),
2393                                               GFP_KERNEL);
2394         if (!s->map_cmdline_to_pid) {
2395                 free_saved_cmdlines_buffer(s);
2396                 return NULL;
2397         }
2398
2399         s->cmdline_idx = 0;
2400         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2401                sizeof(s->map_pid_to_cmdline));
2402         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2403                val * sizeof(*s->map_cmdline_to_pid));
2404
2405         return s;
2406 }
2407
2408 static int trace_create_savedcmd(void)
2409 {
2410         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2411
2412         return savedcmd ? 0 : -ENOMEM;
2413 }
2414
2415 int is_tracing_stopped(void)
2416 {
2417         return global_trace.stop_count;
2418 }
2419
2420 static void tracing_start_tr(struct trace_array *tr)
2421 {
2422         struct trace_buffer *buffer;
2423         unsigned long flags;
2424
2425         if (tracing_disabled)
2426                 return;
2427
2428         raw_spin_lock_irqsave(&tr->start_lock, flags);
2429         if (--tr->stop_count) {
2430                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2431                         /* Someone screwed up their debugging */
2432                         tr->stop_count = 0;
2433                 }
2434                 goto out;
2435         }
2436
2437         /* Prevent the buffers from switching */
2438         arch_spin_lock(&tr->max_lock);
2439
2440         buffer = tr->array_buffer.buffer;
2441         if (buffer)
2442                 ring_buffer_record_enable(buffer);
2443
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445         buffer = tr->max_buffer.buffer;
2446         if (buffer)
2447                 ring_buffer_record_enable(buffer);
2448 #endif
2449
2450         arch_spin_unlock(&tr->max_lock);
2451
2452  out:
2453         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2454 }
2455
2456 /**
2457  * tracing_start - quick start of the tracer
2458  *
2459  * If tracing is enabled but was stopped by tracing_stop,
2460  * this will start the tracer back up.
2461  */
2462 void tracing_start(void)
2463
2464 {
2465         return tracing_start_tr(&global_trace);
2466 }
2467
2468 static void tracing_stop_tr(struct trace_array *tr)
2469 {
2470         struct trace_buffer *buffer;
2471         unsigned long flags;
2472
2473         raw_spin_lock_irqsave(&tr->start_lock, flags);
2474         if (tr->stop_count++)
2475                 goto out;
2476
2477         /* Prevent the buffers from switching */
2478         arch_spin_lock(&tr->max_lock);
2479
2480         buffer = tr->array_buffer.buffer;
2481         if (buffer)
2482                 ring_buffer_record_disable(buffer);
2483
2484 #ifdef CONFIG_TRACER_MAX_TRACE
2485         buffer = tr->max_buffer.buffer;
2486         if (buffer)
2487                 ring_buffer_record_disable(buffer);
2488 #endif
2489
2490         arch_spin_unlock(&tr->max_lock);
2491
2492  out:
2493         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2494 }
2495
2496 /**
2497  * tracing_stop - quick stop of the tracer
2498  *
2499  * Light weight way to stop tracing. Use in conjunction with
2500  * tracing_start.
2501  */
2502 void tracing_stop(void)
2503 {
2504         return tracing_stop_tr(&global_trace);
2505 }
2506
2507 static int trace_save_cmdline(struct task_struct *tsk)
2508 {
2509         unsigned tpid, idx;
2510
2511         /* treat recording of idle task as a success */
2512         if (!tsk->pid)
2513                 return 1;
2514
2515         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2516
2517         /*
2518          * It's not the end of the world if we don't get
2519          * the lock, but we also don't want to spin
2520          * nor do we want to disable interrupts,
2521          * so if we miss here, then better luck next time.
2522          *
2523          * This is called within the scheduler and wake up, so interrupts
2524          * had better been disabled and run queue lock been held.
2525          */
2526         lockdep_assert_preemption_disabled();
2527         if (!arch_spin_trylock(&trace_cmdline_lock))
2528                 return 0;
2529
2530         idx = savedcmd->map_pid_to_cmdline[tpid];
2531         if (idx == NO_CMDLINE_MAP) {
2532                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2533
2534                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2535                 savedcmd->cmdline_idx = idx;
2536         }
2537
2538         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2539         set_cmdline(idx, tsk->comm);
2540
2541         arch_spin_unlock(&trace_cmdline_lock);
2542
2543         return 1;
2544 }
2545
2546 static void __trace_find_cmdline(int pid, char comm[])
2547 {
2548         unsigned map;
2549         int tpid;
2550
2551         if (!pid) {
2552                 strcpy(comm, "<idle>");
2553                 return;
2554         }
2555
2556         if (WARN_ON_ONCE(pid < 0)) {
2557                 strcpy(comm, "<XXX>");
2558                 return;
2559         }
2560
2561         tpid = pid & (PID_MAX_DEFAULT - 1);
2562         map = savedcmd->map_pid_to_cmdline[tpid];
2563         if (map != NO_CMDLINE_MAP) {
2564                 tpid = savedcmd->map_cmdline_to_pid[map];
2565                 if (tpid == pid) {
2566                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2567                         return;
2568                 }
2569         }
2570         strcpy(comm, "<...>");
2571 }
2572
2573 void trace_find_cmdline(int pid, char comm[])
2574 {
2575         preempt_disable();
2576         arch_spin_lock(&trace_cmdline_lock);
2577
2578         __trace_find_cmdline(pid, comm);
2579
2580         arch_spin_unlock(&trace_cmdline_lock);
2581         preempt_enable();
2582 }
2583
2584 static int *trace_find_tgid_ptr(int pid)
2585 {
2586         /*
2587          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2588          * if we observe a non-NULL tgid_map then we also observe the correct
2589          * tgid_map_max.
2590          */
2591         int *map = smp_load_acquire(&tgid_map);
2592
2593         if (unlikely(!map || pid > tgid_map_max))
2594                 return NULL;
2595
2596         return &map[pid];
2597 }
2598
2599 int trace_find_tgid(int pid)
2600 {
2601         int *ptr = trace_find_tgid_ptr(pid);
2602
2603         return ptr ? *ptr : 0;
2604 }
2605
2606 static int trace_save_tgid(struct task_struct *tsk)
2607 {
2608         int *ptr;
2609
2610         /* treat recording of idle task as a success */
2611         if (!tsk->pid)
2612                 return 1;
2613
2614         ptr = trace_find_tgid_ptr(tsk->pid);
2615         if (!ptr)
2616                 return 0;
2617
2618         *ptr = tsk->tgid;
2619         return 1;
2620 }
2621
2622 static bool tracing_record_taskinfo_skip(int flags)
2623 {
2624         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2625                 return true;
2626         if (!__this_cpu_read(trace_taskinfo_save))
2627                 return true;
2628         return false;
2629 }
2630
2631 /**
2632  * tracing_record_taskinfo - record the task info of a task
2633  *
2634  * @task:  task to record
2635  * @flags: TRACE_RECORD_CMDLINE for recording comm
2636  *         TRACE_RECORD_TGID for recording tgid
2637  */
2638 void tracing_record_taskinfo(struct task_struct *task, int flags)
2639 {
2640         bool done;
2641
2642         if (tracing_record_taskinfo_skip(flags))
2643                 return;
2644
2645         /*
2646          * Record as much task information as possible. If some fail, continue
2647          * to try to record the others.
2648          */
2649         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2650         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2651
2652         /* If recording any information failed, retry again soon. */
2653         if (!done)
2654                 return;
2655
2656         __this_cpu_write(trace_taskinfo_save, false);
2657 }
2658
2659 /**
2660  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2661  *
2662  * @prev: previous task during sched_switch
2663  * @next: next task during sched_switch
2664  * @flags: TRACE_RECORD_CMDLINE for recording comm
2665  *         TRACE_RECORD_TGID for recording tgid
2666  */
2667 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2668                                           struct task_struct *next, int flags)
2669 {
2670         bool done;
2671
2672         if (tracing_record_taskinfo_skip(flags))
2673                 return;
2674
2675         /*
2676          * Record as much task information as possible. If some fail, continue
2677          * to try to record the others.
2678          */
2679         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2680         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2681         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2682         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2683
2684         /* If recording any information failed, retry again soon. */
2685         if (!done)
2686                 return;
2687
2688         __this_cpu_write(trace_taskinfo_save, false);
2689 }
2690
2691 /* Helpers to record a specific task information */
2692 void tracing_record_cmdline(struct task_struct *task)
2693 {
2694         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2695 }
2696
2697 void tracing_record_tgid(struct task_struct *task)
2698 {
2699         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2700 }
2701
2702 /*
2703  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2704  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2705  * simplifies those functions and keeps them in sync.
2706  */
2707 enum print_line_t trace_handle_return(struct trace_seq *s)
2708 {
2709         return trace_seq_has_overflowed(s) ?
2710                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2711 }
2712 EXPORT_SYMBOL_GPL(trace_handle_return);
2713
2714 static unsigned short migration_disable_value(void)
2715 {
2716 #if defined(CONFIG_SMP)
2717         return current->migration_disabled;
2718 #else
2719         return 0;
2720 #endif
2721 }
2722
2723 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2724 {
2725         unsigned int trace_flags = irqs_status;
2726         unsigned int pc;
2727
2728         pc = preempt_count();
2729
2730         if (pc & NMI_MASK)
2731                 trace_flags |= TRACE_FLAG_NMI;
2732         if (pc & HARDIRQ_MASK)
2733                 trace_flags |= TRACE_FLAG_HARDIRQ;
2734         if (in_serving_softirq())
2735                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2736         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2737                 trace_flags |= TRACE_FLAG_BH_OFF;
2738
2739         if (tif_need_resched())
2740                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2741         if (test_preempt_need_resched())
2742                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2743         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2744                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2745 }
2746
2747 struct ring_buffer_event *
2748 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2749                           int type,
2750                           unsigned long len,
2751                           unsigned int trace_ctx)
2752 {
2753         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2754 }
2755
2756 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2757 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2758 static int trace_buffered_event_ref;
2759
2760 /**
2761  * trace_buffered_event_enable - enable buffering events
2762  *
2763  * When events are being filtered, it is quicker to use a temporary
2764  * buffer to write the event data into if there's a likely chance
2765  * that it will not be committed. The discard of the ring buffer
2766  * is not as fast as committing, and is much slower than copying
2767  * a commit.
2768  *
2769  * When an event is to be filtered, allocate per cpu buffers to
2770  * write the event data into, and if the event is filtered and discarded
2771  * it is simply dropped, otherwise, the entire data is to be committed
2772  * in one shot.
2773  */
2774 void trace_buffered_event_enable(void)
2775 {
2776         struct ring_buffer_event *event;
2777         struct page *page;
2778         int cpu;
2779
2780         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2781
2782         if (trace_buffered_event_ref++)
2783                 return;
2784
2785         for_each_tracing_cpu(cpu) {
2786                 page = alloc_pages_node(cpu_to_node(cpu),
2787                                         GFP_KERNEL | __GFP_NORETRY, 0);
2788                 /* This is just an optimization and can handle failures */
2789                 if (!page) {
2790                         pr_err("Failed to allocate event buffer\n");
2791                         break;
2792                 }
2793
2794                 event = page_address(page);
2795                 memset(event, 0, sizeof(*event));
2796
2797                 per_cpu(trace_buffered_event, cpu) = event;
2798
2799                 preempt_disable();
2800                 if (cpu == smp_processor_id() &&
2801                     __this_cpu_read(trace_buffered_event) !=
2802                     per_cpu(trace_buffered_event, cpu))
2803                         WARN_ON_ONCE(1);
2804                 preempt_enable();
2805         }
2806 }
2807
2808 static void enable_trace_buffered_event(void *data)
2809 {
2810         /* Probably not needed, but do it anyway */
2811         smp_rmb();
2812         this_cpu_dec(trace_buffered_event_cnt);
2813 }
2814
2815 static void disable_trace_buffered_event(void *data)
2816 {
2817         this_cpu_inc(trace_buffered_event_cnt);
2818 }
2819
2820 /**
2821  * trace_buffered_event_disable - disable buffering events
2822  *
2823  * When a filter is removed, it is faster to not use the buffered
2824  * events, and to commit directly into the ring buffer. Free up
2825  * the temp buffers when there are no more users. This requires
2826  * special synchronization with current events.
2827  */
2828 void trace_buffered_event_disable(void)
2829 {
2830         int cpu;
2831
2832         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2833
2834         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2835                 return;
2836
2837         if (--trace_buffered_event_ref)
2838                 return;
2839
2840         /* For each CPU, set the buffer as used. */
2841         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2842                          NULL, true);
2843
2844         /* Wait for all current users to finish */
2845         synchronize_rcu();
2846
2847         for_each_tracing_cpu(cpu) {
2848                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2849                 per_cpu(trace_buffered_event, cpu) = NULL;
2850         }
2851
2852         /*
2853          * Wait for all CPUs that potentially started checking if they can use
2854          * their event buffer only after the previous synchronize_rcu() call and
2855          * they still read a valid pointer from trace_buffered_event. It must be
2856          * ensured they don't see cleared trace_buffered_event_cnt else they
2857          * could wrongly decide to use the pointed-to buffer which is now freed.
2858          */
2859         synchronize_rcu();
2860
2861         /* For each CPU, relinquish the buffer */
2862         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2863                          true);
2864 }
2865
2866 static struct trace_buffer *temp_buffer;
2867
2868 struct ring_buffer_event *
2869 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2870                           struct trace_event_file *trace_file,
2871                           int type, unsigned long len,
2872                           unsigned int trace_ctx)
2873 {
2874         struct ring_buffer_event *entry;
2875         struct trace_array *tr = trace_file->tr;
2876         int val;
2877
2878         *current_rb = tr->array_buffer.buffer;
2879
2880         if (!tr->no_filter_buffering_ref &&
2881             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2882                 preempt_disable_notrace();
2883                 /*
2884                  * Filtering is on, so try to use the per cpu buffer first.
2885                  * This buffer will simulate a ring_buffer_event,
2886                  * where the type_len is zero and the array[0] will
2887                  * hold the full length.
2888                  * (see include/linux/ring-buffer.h for details on
2889                  *  how the ring_buffer_event is structured).
2890                  *
2891                  * Using a temp buffer during filtering and copying it
2892                  * on a matched filter is quicker than writing directly
2893                  * into the ring buffer and then discarding it when
2894                  * it doesn't match. That is because the discard
2895                  * requires several atomic operations to get right.
2896                  * Copying on match and doing nothing on a failed match
2897                  * is still quicker than no copy on match, but having
2898                  * to discard out of the ring buffer on a failed match.
2899                  */
2900                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2901                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2902
2903                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2904
2905                         /*
2906                          * Preemption is disabled, but interrupts and NMIs
2907                          * can still come in now. If that happens after
2908                          * the above increment, then it will have to go
2909                          * back to the old method of allocating the event
2910                          * on the ring buffer, and if the filter fails, it
2911                          * will have to call ring_buffer_discard_commit()
2912                          * to remove it.
2913                          *
2914                          * Need to also check the unlikely case that the
2915                          * length is bigger than the temp buffer size.
2916                          * If that happens, then the reserve is pretty much
2917                          * guaranteed to fail, as the ring buffer currently
2918                          * only allows events less than a page. But that may
2919                          * change in the future, so let the ring buffer reserve
2920                          * handle the failure in that case.
2921                          */
2922                         if (val == 1 && likely(len <= max_len)) {
2923                                 trace_event_setup(entry, type, trace_ctx);
2924                                 entry->array[0] = len;
2925                                 /* Return with preemption disabled */
2926                                 return entry;
2927                         }
2928                         this_cpu_dec(trace_buffered_event_cnt);
2929                 }
2930                 /* __trace_buffer_lock_reserve() disables preemption */
2931                 preempt_enable_notrace();
2932         }
2933
2934         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2935                                             trace_ctx);
2936         /*
2937          * If tracing is off, but we have triggers enabled
2938          * we still need to look at the event data. Use the temp_buffer
2939          * to store the trace event for the trigger to use. It's recursive
2940          * safe and will not be recorded anywhere.
2941          */
2942         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2943                 *current_rb = temp_buffer;
2944                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2945                                                     trace_ctx);
2946         }
2947         return entry;
2948 }
2949 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2950
2951 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2952 static DEFINE_MUTEX(tracepoint_printk_mutex);
2953
2954 static void output_printk(struct trace_event_buffer *fbuffer)
2955 {
2956         struct trace_event_call *event_call;
2957         struct trace_event_file *file;
2958         struct trace_event *event;
2959         unsigned long flags;
2960         struct trace_iterator *iter = tracepoint_print_iter;
2961
2962         /* We should never get here if iter is NULL */
2963         if (WARN_ON_ONCE(!iter))
2964                 return;
2965
2966         event_call = fbuffer->trace_file->event_call;
2967         if (!event_call || !event_call->event.funcs ||
2968             !event_call->event.funcs->trace)
2969                 return;
2970
2971         file = fbuffer->trace_file;
2972         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2973             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2974              !filter_match_preds(file->filter, fbuffer->entry)))
2975                 return;
2976
2977         event = &fbuffer->trace_file->event_call->event;
2978
2979         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2980         trace_seq_init(&iter->seq);
2981         iter->ent = fbuffer->entry;
2982         event_call->event.funcs->trace(iter, 0, event);
2983         trace_seq_putc(&iter->seq, 0);
2984         printk("%s", iter->seq.buffer);
2985
2986         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2987 }
2988
2989 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2990                              void *buffer, size_t *lenp,
2991                              loff_t *ppos)
2992 {
2993         int save_tracepoint_printk;
2994         int ret;
2995
2996         mutex_lock(&tracepoint_printk_mutex);
2997         save_tracepoint_printk = tracepoint_printk;
2998
2999         ret = proc_dointvec(table, write, buffer, lenp, ppos);
3000
3001         /*
3002          * This will force exiting early, as tracepoint_printk
3003          * is always zero when tracepoint_printk_iter is not allocated
3004          */
3005         if (!tracepoint_print_iter)
3006                 tracepoint_printk = 0;
3007
3008         if (save_tracepoint_printk == tracepoint_printk)
3009                 goto out;
3010
3011         if (tracepoint_printk)
3012                 static_key_enable(&tracepoint_printk_key.key);
3013         else
3014                 static_key_disable(&tracepoint_printk_key.key);
3015
3016  out:
3017         mutex_unlock(&tracepoint_printk_mutex);
3018
3019         return ret;
3020 }
3021
3022 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3023 {
3024         enum event_trigger_type tt = ETT_NONE;
3025         struct trace_event_file *file = fbuffer->trace_file;
3026
3027         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3028                         fbuffer->entry, &tt))
3029                 goto discard;
3030
3031         if (static_key_false(&tracepoint_printk_key.key))
3032                 output_printk(fbuffer);
3033
3034         if (static_branch_unlikely(&trace_event_exports_enabled))
3035                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3036
3037         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3038                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3039
3040 discard:
3041         if (tt)
3042                 event_triggers_post_call(file, tt);
3043
3044 }
3045 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3046
3047 /*
3048  * Skip 3:
3049  *
3050  *   trace_buffer_unlock_commit_regs()
3051  *   trace_event_buffer_commit()
3052  *   trace_event_raw_event_xxx()
3053  */
3054 # define STACK_SKIP 3
3055
3056 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3057                                      struct trace_buffer *buffer,
3058                                      struct ring_buffer_event *event,
3059                                      unsigned int trace_ctx,
3060                                      struct pt_regs *regs)
3061 {
3062         __buffer_unlock_commit(buffer, event);
3063
3064         /*
3065          * If regs is not set, then skip the necessary functions.
3066          * Note, we can still get here via blktrace, wakeup tracer
3067          * and mmiotrace, but that's ok if they lose a function or
3068          * two. They are not that meaningful.
3069          */
3070         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3071         ftrace_trace_userstack(tr, buffer, trace_ctx);
3072 }
3073
3074 /*
3075  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3076  */
3077 void
3078 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3079                                    struct ring_buffer_event *event)
3080 {
3081         __buffer_unlock_commit(buffer, event);
3082 }
3083
3084 void
3085 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3086                parent_ip, unsigned int trace_ctx)
3087 {
3088         struct trace_event_call *call = &event_function;
3089         struct trace_buffer *buffer = tr->array_buffer.buffer;
3090         struct ring_buffer_event *event;
3091         struct ftrace_entry *entry;
3092
3093         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3094                                             trace_ctx);
3095         if (!event)
3096                 return;
3097         entry   = ring_buffer_event_data(event);
3098         entry->ip                       = ip;
3099         entry->parent_ip                = parent_ip;
3100
3101         if (!call_filter_check_discard(call, entry, buffer, event)) {
3102                 if (static_branch_unlikely(&trace_function_exports_enabled))
3103                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3104                 __buffer_unlock_commit(buffer, event);
3105         }
3106 }
3107
3108 #ifdef CONFIG_STACKTRACE
3109
3110 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3111 #define FTRACE_KSTACK_NESTING   4
3112
3113 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3114
3115 struct ftrace_stack {
3116         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3117 };
3118
3119
3120 struct ftrace_stacks {
3121         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3122 };
3123
3124 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3125 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3126
3127 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3128                                  unsigned int trace_ctx,
3129                                  int skip, struct pt_regs *regs)
3130 {
3131         struct trace_event_call *call = &event_kernel_stack;
3132         struct ring_buffer_event *event;
3133         unsigned int size, nr_entries;
3134         struct ftrace_stack *fstack;
3135         struct stack_entry *entry;
3136         int stackidx;
3137
3138         /*
3139          * Add one, for this function and the call to save_stack_trace()
3140          * If regs is set, then these functions will not be in the way.
3141          */
3142 #ifndef CONFIG_UNWINDER_ORC
3143         if (!regs)
3144                 skip++;
3145 #endif
3146
3147         preempt_disable_notrace();
3148
3149         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3150
3151         /* This should never happen. If it does, yell once and skip */
3152         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3153                 goto out;
3154
3155         /*
3156          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3157          * interrupt will either see the value pre increment or post
3158          * increment. If the interrupt happens pre increment it will have
3159          * restored the counter when it returns.  We just need a barrier to
3160          * keep gcc from moving things around.
3161          */
3162         barrier();
3163
3164         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3165         size = ARRAY_SIZE(fstack->calls);
3166
3167         if (regs) {
3168                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3169                                                    size, skip);
3170         } else {
3171                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3172         }
3173
3174         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3175                                     struct_size(entry, caller, nr_entries),
3176                                     trace_ctx);
3177         if (!event)
3178                 goto out;
3179         entry = ring_buffer_event_data(event);
3180
3181         entry->size = nr_entries;
3182         memcpy(&entry->caller, fstack->calls,
3183                flex_array_size(entry, caller, nr_entries));
3184
3185         if (!call_filter_check_discard(call, entry, buffer, event))
3186                 __buffer_unlock_commit(buffer, event);
3187
3188  out:
3189         /* Again, don't let gcc optimize things here */
3190         barrier();
3191         __this_cpu_dec(ftrace_stack_reserve);
3192         preempt_enable_notrace();
3193
3194 }
3195
3196 static inline void ftrace_trace_stack(struct trace_array *tr,
3197                                       struct trace_buffer *buffer,
3198                                       unsigned int trace_ctx,
3199                                       int skip, struct pt_regs *regs)
3200 {
3201         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3202                 return;
3203
3204         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3205 }
3206
3207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3208                    int skip)
3209 {
3210         struct trace_buffer *buffer = tr->array_buffer.buffer;
3211
3212         if (rcu_is_watching()) {
3213                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3214                 return;
3215         }
3216
3217         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3218                 return;
3219
3220         /*
3221          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3222          * but if the above rcu_is_watching() failed, then the NMI
3223          * triggered someplace critical, and ct_irq_enter() should
3224          * not be called from NMI.
3225          */
3226         if (unlikely(in_nmi()))
3227                 return;
3228
3229         ct_irq_enter_irqson();
3230         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3231         ct_irq_exit_irqson();
3232 }
3233
3234 /**
3235  * trace_dump_stack - record a stack back trace in the trace buffer
3236  * @skip: Number of functions to skip (helper handlers)
3237  */
3238 void trace_dump_stack(int skip)
3239 {
3240         if (tracing_disabled || tracing_selftest_running)
3241                 return;
3242
3243 #ifndef CONFIG_UNWINDER_ORC
3244         /* Skip 1 to skip this function. */
3245         skip++;
3246 #endif
3247         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3248                              tracing_gen_ctx(), skip, NULL);
3249 }
3250 EXPORT_SYMBOL_GPL(trace_dump_stack);
3251
3252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3253 static DEFINE_PER_CPU(int, user_stack_count);
3254
3255 static void
3256 ftrace_trace_userstack(struct trace_array *tr,
3257                        struct trace_buffer *buffer, unsigned int trace_ctx)
3258 {
3259         struct trace_event_call *call = &event_user_stack;
3260         struct ring_buffer_event *event;
3261         struct userstack_entry *entry;
3262
3263         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3264                 return;
3265
3266         /*
3267          * NMIs can not handle page faults, even with fix ups.
3268          * The save user stack can (and often does) fault.
3269          */
3270         if (unlikely(in_nmi()))
3271                 return;
3272
3273         /*
3274          * prevent recursion, since the user stack tracing may
3275          * trigger other kernel events.
3276          */
3277         preempt_disable();
3278         if (__this_cpu_read(user_stack_count))
3279                 goto out;
3280
3281         __this_cpu_inc(user_stack_count);
3282
3283         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3284                                             sizeof(*entry), trace_ctx);
3285         if (!event)
3286                 goto out_drop_count;
3287         entry   = ring_buffer_event_data(event);
3288
3289         entry->tgid             = current->tgid;
3290         memset(&entry->caller, 0, sizeof(entry->caller));
3291
3292         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3293         if (!call_filter_check_discard(call, entry, buffer, event))
3294                 __buffer_unlock_commit(buffer, event);
3295
3296  out_drop_count:
3297         __this_cpu_dec(user_stack_count);
3298  out:
3299         preempt_enable();
3300 }
3301 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3302 static void ftrace_trace_userstack(struct trace_array *tr,
3303                                    struct trace_buffer *buffer,
3304                                    unsigned int trace_ctx)
3305 {
3306 }
3307 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3308
3309 #endif /* CONFIG_STACKTRACE */
3310
3311 static inline void
3312 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3313                           unsigned long long delta)
3314 {
3315         entry->bottom_delta_ts = delta & U32_MAX;
3316         entry->top_delta_ts = (delta >> 32);
3317 }
3318
3319 void trace_last_func_repeats(struct trace_array *tr,
3320                              struct trace_func_repeats *last_info,
3321                              unsigned int trace_ctx)
3322 {
3323         struct trace_buffer *buffer = tr->array_buffer.buffer;
3324         struct func_repeats_entry *entry;
3325         struct ring_buffer_event *event;
3326         u64 delta;
3327
3328         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3329                                             sizeof(*entry), trace_ctx);
3330         if (!event)
3331                 return;
3332
3333         delta = ring_buffer_event_time_stamp(buffer, event) -
3334                 last_info->ts_last_call;
3335
3336         entry = ring_buffer_event_data(event);
3337         entry->ip = last_info->ip;
3338         entry->parent_ip = last_info->parent_ip;
3339         entry->count = last_info->count;
3340         func_repeats_set_delta_ts(entry, delta);
3341
3342         __buffer_unlock_commit(buffer, event);
3343 }
3344
3345 /* created for use with alloc_percpu */
3346 struct trace_buffer_struct {
3347         int nesting;
3348         char buffer[4][TRACE_BUF_SIZE];
3349 };
3350
3351 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3352
3353 /*
3354  * This allows for lockless recording.  If we're nested too deeply, then
3355  * this returns NULL.
3356  */
3357 static char *get_trace_buf(void)
3358 {
3359         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3360
3361         if (!trace_percpu_buffer || buffer->nesting >= 4)
3362                 return NULL;
3363
3364         buffer->nesting++;
3365
3366         /* Interrupts must see nesting incremented before we use the buffer */
3367         barrier();
3368         return &buffer->buffer[buffer->nesting - 1][0];
3369 }
3370
3371 static void put_trace_buf(void)
3372 {
3373         /* Don't let the decrement of nesting leak before this */
3374         barrier();
3375         this_cpu_dec(trace_percpu_buffer->nesting);
3376 }
3377
3378 static int alloc_percpu_trace_buffer(void)
3379 {
3380         struct trace_buffer_struct __percpu *buffers;
3381
3382         if (trace_percpu_buffer)
3383                 return 0;
3384
3385         buffers = alloc_percpu(struct trace_buffer_struct);
3386         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3387                 return -ENOMEM;
3388
3389         trace_percpu_buffer = buffers;
3390         return 0;
3391 }
3392
3393 static int buffers_allocated;
3394
3395 void trace_printk_init_buffers(void)
3396 {
3397         if (buffers_allocated)
3398                 return;
3399
3400         if (alloc_percpu_trace_buffer())
3401                 return;
3402
3403         /* trace_printk() is for debug use only. Don't use it in production. */
3404
3405         pr_warn("\n");
3406         pr_warn("**********************************************************\n");
3407         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3408         pr_warn("**                                                      **\n");
3409         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3410         pr_warn("**                                                      **\n");
3411         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3412         pr_warn("** unsafe for production use.                           **\n");
3413         pr_warn("**                                                      **\n");
3414         pr_warn("** If you see this message and you are not debugging    **\n");
3415         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3416         pr_warn("**                                                      **\n");
3417         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3418         pr_warn("**********************************************************\n");
3419
3420         /* Expand the buffers to set size */
3421         tracing_update_buffers(&global_trace);
3422
3423         buffers_allocated = 1;
3424
3425         /*
3426          * trace_printk_init_buffers() can be called by modules.
3427          * If that happens, then we need to start cmdline recording
3428          * directly here. If the global_trace.buffer is already
3429          * allocated here, then this was called by module code.
3430          */
3431         if (global_trace.array_buffer.buffer)
3432                 tracing_start_cmdline_record();
3433 }
3434 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3435
3436 void trace_printk_start_comm(void)
3437 {
3438         /* Start tracing comms if trace printk is set */
3439         if (!buffers_allocated)
3440                 return;
3441         tracing_start_cmdline_record();
3442 }
3443
3444 static void trace_printk_start_stop_comm(int enabled)
3445 {
3446         if (!buffers_allocated)
3447                 return;
3448
3449         if (enabled)
3450                 tracing_start_cmdline_record();
3451         else
3452                 tracing_stop_cmdline_record();
3453 }
3454
3455 /**
3456  * trace_vbprintk - write binary msg to tracing buffer
3457  * @ip:    The address of the caller
3458  * @fmt:   The string format to write to the buffer
3459  * @args:  Arguments for @fmt
3460  */
3461 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3462 {
3463         struct trace_event_call *call = &event_bprint;
3464         struct ring_buffer_event *event;
3465         struct trace_buffer *buffer;
3466         struct trace_array *tr = &global_trace;
3467         struct bprint_entry *entry;
3468         unsigned int trace_ctx;
3469         char *tbuffer;
3470         int len = 0, size;
3471
3472         if (unlikely(tracing_selftest_running || tracing_disabled))
3473                 return 0;
3474
3475         /* Don't pollute graph traces with trace_vprintk internals */
3476         pause_graph_tracing();
3477
3478         trace_ctx = tracing_gen_ctx();
3479         preempt_disable_notrace();
3480
3481         tbuffer = get_trace_buf();
3482         if (!tbuffer) {
3483                 len = 0;
3484                 goto out_nobuffer;
3485         }
3486
3487         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3488
3489         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3490                 goto out_put;
3491
3492         size = sizeof(*entry) + sizeof(u32) * len;
3493         buffer = tr->array_buffer.buffer;
3494         ring_buffer_nest_start(buffer);
3495         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3496                                             trace_ctx);
3497         if (!event)
3498                 goto out;
3499         entry = ring_buffer_event_data(event);
3500         entry->ip                       = ip;
3501         entry->fmt                      = fmt;
3502
3503         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3504         if (!call_filter_check_discard(call, entry, buffer, event)) {
3505                 __buffer_unlock_commit(buffer, event);
3506                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3507         }
3508
3509 out:
3510         ring_buffer_nest_end(buffer);
3511 out_put:
3512         put_trace_buf();
3513
3514 out_nobuffer:
3515         preempt_enable_notrace();
3516         unpause_graph_tracing();
3517
3518         return len;
3519 }
3520 EXPORT_SYMBOL_GPL(trace_vbprintk);
3521
3522 __printf(3, 0)
3523 static int
3524 __trace_array_vprintk(struct trace_buffer *buffer,
3525                       unsigned long ip, const char *fmt, va_list args)
3526 {
3527         struct trace_event_call *call = &event_print;
3528         struct ring_buffer_event *event;
3529         int len = 0, size;
3530         struct print_entry *entry;
3531         unsigned int trace_ctx;
3532         char *tbuffer;
3533
3534         if (tracing_disabled)
3535                 return 0;
3536
3537         /* Don't pollute graph traces with trace_vprintk internals */
3538         pause_graph_tracing();
3539
3540         trace_ctx = tracing_gen_ctx();
3541         preempt_disable_notrace();
3542
3543
3544         tbuffer = get_trace_buf();
3545         if (!tbuffer) {
3546                 len = 0;
3547                 goto out_nobuffer;
3548         }
3549
3550         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3551
3552         size = sizeof(*entry) + len + 1;
3553         ring_buffer_nest_start(buffer);
3554         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3555                                             trace_ctx);
3556         if (!event)
3557                 goto out;
3558         entry = ring_buffer_event_data(event);
3559         entry->ip = ip;
3560
3561         memcpy(&entry->buf, tbuffer, len + 1);
3562         if (!call_filter_check_discard(call, entry, buffer, event)) {
3563                 __buffer_unlock_commit(buffer, event);
3564                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3565         }
3566
3567 out:
3568         ring_buffer_nest_end(buffer);
3569         put_trace_buf();
3570
3571 out_nobuffer:
3572         preempt_enable_notrace();
3573         unpause_graph_tracing();
3574
3575         return len;
3576 }
3577
3578 __printf(3, 0)
3579 int trace_array_vprintk(struct trace_array *tr,
3580                         unsigned long ip, const char *fmt, va_list args)
3581 {
3582         if (tracing_selftest_running && tr == &global_trace)
3583                 return 0;
3584
3585         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3586 }
3587
3588 /**
3589  * trace_array_printk - Print a message to a specific instance
3590  * @tr: The instance trace_array descriptor
3591  * @ip: The instruction pointer that this is called from.
3592  * @fmt: The format to print (printf format)
3593  *
3594  * If a subsystem sets up its own instance, they have the right to
3595  * printk strings into their tracing instance buffer using this
3596  * function. Note, this function will not write into the top level
3597  * buffer (use trace_printk() for that), as writing into the top level
3598  * buffer should only have events that can be individually disabled.
3599  * trace_printk() is only used for debugging a kernel, and should not
3600  * be ever incorporated in normal use.
3601  *
3602  * trace_array_printk() can be used, as it will not add noise to the
3603  * top level tracing buffer.
3604  *
3605  * Note, trace_array_init_printk() must be called on @tr before this
3606  * can be used.
3607  */
3608 __printf(3, 0)
3609 int trace_array_printk(struct trace_array *tr,
3610                        unsigned long ip, const char *fmt, ...)
3611 {
3612         int ret;
3613         va_list ap;
3614
3615         if (!tr)
3616                 return -ENOENT;
3617
3618         /* This is only allowed for created instances */
3619         if (tr == &global_trace)
3620                 return 0;
3621
3622         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3623                 return 0;
3624
3625         va_start(ap, fmt);
3626         ret = trace_array_vprintk(tr, ip, fmt, ap);
3627         va_end(ap);
3628         return ret;
3629 }
3630 EXPORT_SYMBOL_GPL(trace_array_printk);
3631
3632 /**
3633  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3634  * @tr: The trace array to initialize the buffers for
3635  *
3636  * As trace_array_printk() only writes into instances, they are OK to
3637  * have in the kernel (unlike trace_printk()). This needs to be called
3638  * before trace_array_printk() can be used on a trace_array.
3639  */
3640 int trace_array_init_printk(struct trace_array *tr)
3641 {
3642         if (!tr)
3643                 return -ENOENT;
3644
3645         /* This is only allowed for created instances */
3646         if (tr == &global_trace)
3647                 return -EINVAL;
3648
3649         return alloc_percpu_trace_buffer();
3650 }
3651 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3652
3653 __printf(3, 4)
3654 int trace_array_printk_buf(struct trace_buffer *buffer,
3655                            unsigned long ip, const char *fmt, ...)
3656 {
3657         int ret;
3658         va_list ap;
3659
3660         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3661                 return 0;
3662
3663         va_start(ap, fmt);
3664         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3665         va_end(ap);
3666         return ret;
3667 }
3668
3669 __printf(2, 0)
3670 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3671 {
3672         return trace_array_vprintk(&global_trace, ip, fmt, args);
3673 }
3674 EXPORT_SYMBOL_GPL(trace_vprintk);
3675
3676 static void trace_iterator_increment(struct trace_iterator *iter)
3677 {
3678         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3679
3680         iter->idx++;
3681         if (buf_iter)
3682                 ring_buffer_iter_advance(buf_iter);
3683 }
3684
3685 static struct trace_entry *
3686 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3687                 unsigned long *lost_events)
3688 {
3689         struct ring_buffer_event *event;
3690         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3691
3692         if (buf_iter) {
3693                 event = ring_buffer_iter_peek(buf_iter, ts);
3694                 if (lost_events)
3695                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3696                                 (unsigned long)-1 : 0;
3697         } else {
3698                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3699                                          lost_events);
3700         }
3701
3702         if (event) {
3703                 iter->ent_size = ring_buffer_event_length(event);
3704                 return ring_buffer_event_data(event);
3705         }
3706         iter->ent_size = 0;
3707         return NULL;
3708 }
3709
3710 static struct trace_entry *
3711 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3712                   unsigned long *missing_events, u64 *ent_ts)
3713 {
3714         struct trace_buffer *buffer = iter->array_buffer->buffer;
3715         struct trace_entry *ent, *next = NULL;
3716         unsigned long lost_events = 0, next_lost = 0;
3717         int cpu_file = iter->cpu_file;
3718         u64 next_ts = 0, ts;
3719         int next_cpu = -1;
3720         int next_size = 0;
3721         int cpu;
3722
3723         /*
3724          * If we are in a per_cpu trace file, don't bother by iterating over
3725          * all cpu and peek directly.
3726          */
3727         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3728                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3729                         return NULL;
3730                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3731                 if (ent_cpu)
3732                         *ent_cpu = cpu_file;
3733
3734                 return ent;
3735         }
3736
3737         for_each_tracing_cpu(cpu) {
3738
3739                 if (ring_buffer_empty_cpu(buffer, cpu))
3740                         continue;
3741
3742                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3743
3744                 /*
3745                  * Pick the entry with the smallest timestamp:
3746                  */
3747                 if (ent && (!next || ts < next_ts)) {
3748                         next = ent;
3749                         next_cpu = cpu;
3750                         next_ts = ts;
3751                         next_lost = lost_events;
3752                         next_size = iter->ent_size;
3753                 }
3754         }
3755
3756         iter->ent_size = next_size;
3757
3758         if (ent_cpu)
3759                 *ent_cpu = next_cpu;
3760
3761         if (ent_ts)
3762                 *ent_ts = next_ts;
3763
3764         if (missing_events)
3765                 *missing_events = next_lost;
3766
3767         return next;
3768 }
3769
3770 #define STATIC_FMT_BUF_SIZE     128
3771 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3772
3773 char *trace_iter_expand_format(struct trace_iterator *iter)
3774 {
3775         char *tmp;
3776
3777         /*
3778          * iter->tr is NULL when used with tp_printk, which makes
3779          * this get called where it is not safe to call krealloc().
3780          */
3781         if (!iter->tr || iter->fmt == static_fmt_buf)
3782                 return NULL;
3783
3784         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3785                        GFP_KERNEL);
3786         if (tmp) {
3787                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3788                 iter->fmt = tmp;
3789         }
3790
3791         return tmp;
3792 }
3793
3794 /* Returns true if the string is safe to dereference from an event */
3795 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3796                            bool star, int len)
3797 {
3798         unsigned long addr = (unsigned long)str;
3799         struct trace_event *trace_event;
3800         struct trace_event_call *event;
3801
3802         /* Ignore strings with no length */
3803         if (star && !len)
3804                 return true;
3805
3806         /* OK if part of the event data */
3807         if ((addr >= (unsigned long)iter->ent) &&
3808             (addr < (unsigned long)iter->ent + iter->ent_size))
3809                 return true;
3810
3811         /* OK if part of the temp seq buffer */
3812         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3813             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3814                 return true;
3815
3816         /* Core rodata can not be freed */
3817         if (is_kernel_rodata(addr))
3818                 return true;
3819
3820         if (trace_is_tracepoint_string(str))
3821                 return true;
3822
3823         /*
3824          * Now this could be a module event, referencing core module
3825          * data, which is OK.
3826          */
3827         if (!iter->ent)
3828                 return false;
3829
3830         trace_event = ftrace_find_event(iter->ent->type);
3831         if (!trace_event)
3832                 return false;
3833
3834         event = container_of(trace_event, struct trace_event_call, event);
3835         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3836                 return false;
3837
3838         /* Would rather have rodata, but this will suffice */
3839         if (within_module_core(addr, event->module))
3840                 return true;
3841
3842         return false;
3843 }
3844
3845 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3846
3847 static int test_can_verify_check(const char *fmt, ...)
3848 {
3849         char buf[16];
3850         va_list ap;
3851         int ret;
3852
3853         /*
3854          * The verifier is dependent on vsnprintf() modifies the va_list
3855          * passed to it, where it is sent as a reference. Some architectures
3856          * (like x86_32) passes it by value, which means that vsnprintf()
3857          * does not modify the va_list passed to it, and the verifier
3858          * would then need to be able to understand all the values that
3859          * vsnprintf can use. If it is passed by value, then the verifier
3860          * is disabled.
3861          */
3862         va_start(ap, fmt);
3863         vsnprintf(buf, 16, "%d", ap);
3864         ret = va_arg(ap, int);
3865         va_end(ap);
3866
3867         return ret;
3868 }
3869
3870 static void test_can_verify(void)
3871 {
3872         if (!test_can_verify_check("%d %d", 0, 1)) {
3873                 pr_info("trace event string verifier disabled\n");
3874                 static_branch_inc(&trace_no_verify);
3875         }
3876 }
3877
3878 /**
3879  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3880  * @iter: The iterator that holds the seq buffer and the event being printed
3881  * @fmt: The format used to print the event
3882  * @ap: The va_list holding the data to print from @fmt.
3883  *
3884  * This writes the data into the @iter->seq buffer using the data from
3885  * @fmt and @ap. If the format has a %s, then the source of the string
3886  * is examined to make sure it is safe to print, otherwise it will
3887  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3888  * pointer.
3889  */
3890 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3891                          va_list ap)
3892 {
3893         const char *p = fmt;
3894         const char *str;
3895         int i, j;
3896
3897         if (WARN_ON_ONCE(!fmt))
3898                 return;
3899
3900         if (static_branch_unlikely(&trace_no_verify))
3901                 goto print;
3902
3903         /* Don't bother checking when doing a ftrace_dump() */
3904         if (iter->fmt == static_fmt_buf)
3905                 goto print;
3906
3907         while (*p) {
3908                 bool star = false;
3909                 int len = 0;
3910
3911                 j = 0;
3912
3913                 /* We only care about %s and variants */
3914                 for (i = 0; p[i]; i++) {
3915                         if (i + 1 >= iter->fmt_size) {
3916                                 /*
3917                                  * If we can't expand the copy buffer,
3918                                  * just print it.
3919                                  */
3920                                 if (!trace_iter_expand_format(iter))
3921                                         goto print;
3922                         }
3923
3924                         if (p[i] == '\\' && p[i+1]) {
3925                                 i++;
3926                                 continue;
3927                         }
3928                         if (p[i] == '%') {
3929                                 /* Need to test cases like %08.*s */
3930                                 for (j = 1; p[i+j]; j++) {
3931                                         if (isdigit(p[i+j]) ||
3932                                             p[i+j] == '.')
3933                                                 continue;
3934                                         if (p[i+j] == '*') {
3935                                                 star = true;
3936                                                 continue;
3937                                         }
3938                                         break;
3939                                 }
3940                                 if (p[i+j] == 's')
3941                                         break;
3942                                 star = false;
3943                         }
3944                         j = 0;
3945                 }
3946                 /* If no %s found then just print normally */
3947                 if (!p[i])
3948                         break;
3949
3950                 /* Copy up to the %s, and print that */
3951                 strncpy(iter->fmt, p, i);
3952                 iter->fmt[i] = '\0';
3953                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3954
3955                 /*
3956                  * If iter->seq is full, the above call no longer guarantees
3957                  * that ap is in sync with fmt processing, and further calls
3958                  * to va_arg() can return wrong positional arguments.
3959                  *
3960                  * Ensure that ap is no longer used in this case.
3961                  */
3962                 if (iter->seq.full) {
3963                         p = "";
3964                         break;
3965                 }
3966
3967                 if (star)
3968                         len = va_arg(ap, int);
3969
3970                 /* The ap now points to the string data of the %s */
3971                 str = va_arg(ap, const char *);
3972
3973                 /*
3974                  * If you hit this warning, it is likely that the
3975                  * trace event in question used %s on a string that
3976                  * was saved at the time of the event, but may not be
3977                  * around when the trace is read. Use __string(),
3978                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3979                  * instead. See samples/trace_events/trace-events-sample.h
3980                  * for reference.
3981                  */
3982                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3983                               "fmt: '%s' current_buffer: '%s'",
3984                               fmt, seq_buf_str(&iter->seq.seq))) {
3985                         int ret;
3986
3987                         /* Try to safely read the string */
3988                         if (star) {
3989                                 if (len + 1 > iter->fmt_size)
3990                                         len = iter->fmt_size - 1;
3991                                 if (len < 0)
3992                                         len = 0;
3993                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3994                                 iter->fmt[len] = 0;
3995                                 star = false;
3996                         } else {
3997                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3998                                                                   iter->fmt_size);
3999                         }
4000                         if (ret < 0)
4001                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
4002                         else
4003                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
4004                                                  str, iter->fmt);
4005                         str = "[UNSAFE-MEMORY]";
4006                         strcpy(iter->fmt, "%s");
4007                 } else {
4008                         strncpy(iter->fmt, p + i, j + 1);
4009                         iter->fmt[j+1] = '\0';
4010                 }
4011                 if (star)
4012                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4013                 else
4014                         trace_seq_printf(&iter->seq, iter->fmt, str);
4015
4016                 p += i + j + 1;
4017         }
4018  print:
4019         if (*p)
4020                 trace_seq_vprintf(&iter->seq, p, ap);
4021 }
4022
4023 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4024 {
4025         const char *p, *new_fmt;
4026         char *q;
4027
4028         if (WARN_ON_ONCE(!fmt))
4029                 return fmt;
4030
4031         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4032                 return fmt;
4033
4034         p = fmt;
4035         new_fmt = q = iter->fmt;
4036         while (*p) {
4037                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4038                         if (!trace_iter_expand_format(iter))
4039                                 return fmt;
4040
4041                         q += iter->fmt - new_fmt;
4042                         new_fmt = iter->fmt;
4043                 }
4044
4045                 *q++ = *p++;
4046
4047                 /* Replace %p with %px */
4048                 if (p[-1] == '%') {
4049                         if (p[0] == '%') {
4050                                 *q++ = *p++;
4051                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4052                                 *q++ = *p++;
4053                                 *q++ = 'x';
4054                         }
4055                 }
4056         }
4057         *q = '\0';
4058
4059         return new_fmt;
4060 }
4061
4062 #define STATIC_TEMP_BUF_SIZE    128
4063 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4064
4065 /* Find the next real entry, without updating the iterator itself */
4066 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4067                                           int *ent_cpu, u64 *ent_ts)
4068 {
4069         /* __find_next_entry will reset ent_size */
4070         int ent_size = iter->ent_size;
4071         struct trace_entry *entry;
4072
4073         /*
4074          * If called from ftrace_dump(), then the iter->temp buffer
4075          * will be the static_temp_buf and not created from kmalloc.
4076          * If the entry size is greater than the buffer, we can
4077          * not save it. Just return NULL in that case. This is only
4078          * used to add markers when two consecutive events' time
4079          * stamps have a large delta. See trace_print_lat_context()
4080          */
4081         if (iter->temp == static_temp_buf &&
4082             STATIC_TEMP_BUF_SIZE < ent_size)
4083                 return NULL;
4084
4085         /*
4086          * The __find_next_entry() may call peek_next_entry(), which may
4087          * call ring_buffer_peek() that may make the contents of iter->ent
4088          * undefined. Need to copy iter->ent now.
4089          */
4090         if (iter->ent && iter->ent != iter->temp) {
4091                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4092                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4093                         void *temp;
4094                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4095                         if (!temp)
4096                                 return NULL;
4097                         kfree(iter->temp);
4098                         iter->temp = temp;
4099                         iter->temp_size = iter->ent_size;
4100                 }
4101                 memcpy(iter->temp, iter->ent, iter->ent_size);
4102                 iter->ent = iter->temp;
4103         }
4104         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4105         /* Put back the original ent_size */
4106         iter->ent_size = ent_size;
4107
4108         return entry;
4109 }
4110
4111 /* Find the next real entry, and increment the iterator to the next entry */
4112 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4113 {
4114         iter->ent = __find_next_entry(iter, &iter->cpu,
4115                                       &iter->lost_events, &iter->ts);
4116
4117         if (iter->ent)
4118                 trace_iterator_increment(iter);
4119
4120         return iter->ent ? iter : NULL;
4121 }
4122
4123 static void trace_consume(struct trace_iterator *iter)
4124 {
4125         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4126                             &iter->lost_events);
4127 }
4128
4129 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4130 {
4131         struct trace_iterator *iter = m->private;
4132         int i = (int)*pos;
4133         void *ent;
4134
4135         WARN_ON_ONCE(iter->leftover);
4136
4137         (*pos)++;
4138
4139         /* can't go backwards */
4140         if (iter->idx > i)
4141                 return NULL;
4142
4143         if (iter->idx < 0)
4144                 ent = trace_find_next_entry_inc(iter);
4145         else
4146                 ent = iter;
4147
4148         while (ent && iter->idx < i)
4149                 ent = trace_find_next_entry_inc(iter);
4150
4151         iter->pos = *pos;
4152
4153         return ent;
4154 }
4155
4156 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4157 {
4158         struct ring_buffer_iter *buf_iter;
4159         unsigned long entries = 0;
4160         u64 ts;
4161
4162         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4163
4164         buf_iter = trace_buffer_iter(iter, cpu);
4165         if (!buf_iter)
4166                 return;
4167
4168         ring_buffer_iter_reset(buf_iter);
4169
4170         /*
4171          * We could have the case with the max latency tracers
4172          * that a reset never took place on a cpu. This is evident
4173          * by the timestamp being before the start of the buffer.
4174          */
4175         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4176                 if (ts >= iter->array_buffer->time_start)
4177                         break;
4178                 entries++;
4179                 ring_buffer_iter_advance(buf_iter);
4180         }
4181
4182         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4183 }
4184
4185 /*
4186  * The current tracer is copied to avoid a global locking
4187  * all around.
4188  */
4189 static void *s_start(struct seq_file *m, loff_t *pos)
4190 {
4191         struct trace_iterator *iter = m->private;
4192         struct trace_array *tr = iter->tr;
4193         int cpu_file = iter->cpu_file;
4194         void *p = NULL;
4195         loff_t l = 0;
4196         int cpu;
4197
4198         mutex_lock(&trace_types_lock);
4199         if (unlikely(tr->current_trace != iter->trace)) {
4200                 /* Close iter->trace before switching to the new current tracer */
4201                 if (iter->trace->close)
4202                         iter->trace->close(iter);
4203                 iter->trace = tr->current_trace;
4204                 /* Reopen the new current tracer */
4205                 if (iter->trace->open)
4206                         iter->trace->open(iter);
4207         }
4208         mutex_unlock(&trace_types_lock);
4209
4210 #ifdef CONFIG_TRACER_MAX_TRACE
4211         if (iter->snapshot && iter->trace->use_max_tr)
4212                 return ERR_PTR(-EBUSY);
4213 #endif
4214
4215         if (*pos != iter->pos) {
4216                 iter->ent = NULL;
4217                 iter->cpu = 0;
4218                 iter->idx = -1;
4219
4220                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4221                         for_each_tracing_cpu(cpu)
4222                                 tracing_iter_reset(iter, cpu);
4223                 } else
4224                         tracing_iter_reset(iter, cpu_file);
4225
4226                 iter->leftover = 0;
4227                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4228                         ;
4229
4230         } else {
4231                 /*
4232                  * If we overflowed the seq_file before, then we want
4233                  * to just reuse the trace_seq buffer again.
4234                  */
4235                 if (iter->leftover)
4236                         p = iter;
4237                 else {
4238                         l = *pos - 1;
4239                         p = s_next(m, p, &l);
4240                 }
4241         }
4242
4243         trace_event_read_lock();
4244         trace_access_lock(cpu_file);
4245         return p;
4246 }
4247
4248 static void s_stop(struct seq_file *m, void *p)
4249 {
4250         struct trace_iterator *iter = m->private;
4251
4252 #ifdef CONFIG_TRACER_MAX_TRACE
4253         if (iter->snapshot && iter->trace->use_max_tr)
4254                 return;
4255 #endif
4256
4257         trace_access_unlock(iter->cpu_file);
4258         trace_event_read_unlock();
4259 }
4260
4261 static void
4262 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4263                       unsigned long *entries, int cpu)
4264 {
4265         unsigned long count;
4266
4267         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4268         /*
4269          * If this buffer has skipped entries, then we hold all
4270          * entries for the trace and we need to ignore the
4271          * ones before the time stamp.
4272          */
4273         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4274                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4275                 /* total is the same as the entries */
4276                 *total = count;
4277         } else
4278                 *total = count +
4279                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4280         *entries = count;
4281 }
4282
4283 static void
4284 get_total_entries(struct array_buffer *buf,
4285                   unsigned long *total, unsigned long *entries)
4286 {
4287         unsigned long t, e;
4288         int cpu;
4289
4290         *total = 0;
4291         *entries = 0;
4292
4293         for_each_tracing_cpu(cpu) {
4294                 get_total_entries_cpu(buf, &t, &e, cpu);
4295                 *total += t;
4296                 *entries += e;
4297         }
4298 }
4299
4300 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4301 {
4302         unsigned long total, entries;
4303
4304         if (!tr)
4305                 tr = &global_trace;
4306
4307         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4308
4309         return entries;
4310 }
4311
4312 unsigned long trace_total_entries(struct trace_array *tr)
4313 {
4314         unsigned long total, entries;
4315
4316         if (!tr)
4317                 tr = &global_trace;
4318
4319         get_total_entries(&tr->array_buffer, &total, &entries);
4320
4321         return entries;
4322 }
4323
4324 static void print_lat_help_header(struct seq_file *m)
4325 {
4326         seq_puts(m, "#                    _------=> CPU#            \n"
4327                     "#                   / _-----=> irqs-off/BH-disabled\n"
4328                     "#                  | / _----=> need-resched    \n"
4329                     "#                  || / _---=> hardirq/softirq \n"
4330                     "#                  ||| / _--=> preempt-depth   \n"
4331                     "#                  |||| / _-=> migrate-disable \n"
4332                     "#                  ||||| /     delay           \n"
4333                     "#  cmd     pid     |||||| time  |   caller     \n"
4334                     "#     \\   /        ||||||  \\    |    /       \n");
4335 }
4336
4337 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4338 {
4339         unsigned long total;
4340         unsigned long entries;
4341
4342         get_total_entries(buf, &total, &entries);
4343         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4344                    entries, total, num_online_cpus());
4345         seq_puts(m, "#\n");
4346 }
4347
4348 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4349                                    unsigned int flags)
4350 {
4351         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4352
4353         print_event_info(buf, m);
4354
4355         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4356         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4357 }
4358
4359 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4360                                        unsigned int flags)
4361 {
4362         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4363         static const char space[] = "            ";
4364         int prec = tgid ? 12 : 2;
4365
4366         print_event_info(buf, m);
4367
4368         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4369         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4370         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4371         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4372         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4373         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4374         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4375         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4376 }
4377
4378 void
4379 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4380 {
4381         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4382         struct array_buffer *buf = iter->array_buffer;
4383         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4384         struct tracer *type = iter->trace;
4385         unsigned long entries;
4386         unsigned long total;
4387         const char *name = type->name;
4388
4389         get_total_entries(buf, &total, &entries);
4390
4391         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4392                    name, UTS_RELEASE);
4393         seq_puts(m, "# -----------------------------------"
4394                  "---------------------------------\n");
4395         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4396                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4397                    nsecs_to_usecs(data->saved_latency),
4398                    entries,
4399                    total,
4400                    buf->cpu,
4401                    preempt_model_none()      ? "server" :
4402                    preempt_model_voluntary() ? "desktop" :
4403                    preempt_model_full()      ? "preempt" :
4404                    preempt_model_rt()        ? "preempt_rt" :
4405                    "unknown",
4406                    /* These are reserved for later use */
4407                    0, 0, 0, 0);
4408 #ifdef CONFIG_SMP
4409         seq_printf(m, " #P:%d)\n", num_online_cpus());
4410 #else
4411         seq_puts(m, ")\n");
4412 #endif
4413         seq_puts(m, "#    -----------------\n");
4414         seq_printf(m, "#    | task: %.16s-%d "
4415                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4416                    data->comm, data->pid,
4417                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4418                    data->policy, data->rt_priority);
4419         seq_puts(m, "#    -----------------\n");
4420
4421         if (data->critical_start) {
4422                 seq_puts(m, "#  => started at: ");
4423                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4424                 trace_print_seq(m, &iter->seq);
4425                 seq_puts(m, "\n#  => ended at:   ");
4426                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4427                 trace_print_seq(m, &iter->seq);
4428                 seq_puts(m, "\n#\n");
4429         }
4430
4431         seq_puts(m, "#\n");
4432 }
4433
4434 static void test_cpu_buff_start(struct trace_iterator *iter)
4435 {
4436         struct trace_seq *s = &iter->seq;
4437         struct trace_array *tr = iter->tr;
4438
4439         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4440                 return;
4441
4442         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4443                 return;
4444
4445         if (cpumask_available(iter->started) &&
4446             cpumask_test_cpu(iter->cpu, iter->started))
4447                 return;
4448
4449         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4450                 return;
4451
4452         if (cpumask_available(iter->started))
4453                 cpumask_set_cpu(iter->cpu, iter->started);
4454
4455         /* Don't print started cpu buffer for the first entry of the trace */
4456         if (iter->idx > 1)
4457                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4458                                 iter->cpu);
4459 }
4460
4461 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4462 {
4463         struct trace_array *tr = iter->tr;
4464         struct trace_seq *s = &iter->seq;
4465         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4466         struct trace_entry *entry;
4467         struct trace_event *event;
4468
4469         entry = iter->ent;
4470
4471         test_cpu_buff_start(iter);
4472
4473         event = ftrace_find_event(entry->type);
4474
4475         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4476                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4477                         trace_print_lat_context(iter);
4478                 else
4479                         trace_print_context(iter);
4480         }
4481
4482         if (trace_seq_has_overflowed(s))
4483                 return TRACE_TYPE_PARTIAL_LINE;
4484
4485         if (event) {
4486                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4487                         return print_event_fields(iter, event);
4488                 return event->funcs->trace(iter, sym_flags, event);
4489         }
4490
4491         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4492
4493         return trace_handle_return(s);
4494 }
4495
4496 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4497 {
4498         struct trace_array *tr = iter->tr;
4499         struct trace_seq *s = &iter->seq;
4500         struct trace_entry *entry;
4501         struct trace_event *event;
4502
4503         entry = iter->ent;
4504
4505         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4506                 trace_seq_printf(s, "%d %d %llu ",
4507                                  entry->pid, iter->cpu, iter->ts);
4508
4509         if (trace_seq_has_overflowed(s))
4510                 return TRACE_TYPE_PARTIAL_LINE;
4511
4512         event = ftrace_find_event(entry->type);
4513         if (event)
4514                 return event->funcs->raw(iter, 0, event);
4515
4516         trace_seq_printf(s, "%d ?\n", entry->type);
4517
4518         return trace_handle_return(s);
4519 }
4520
4521 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4522 {
4523         struct trace_array *tr = iter->tr;
4524         struct trace_seq *s = &iter->seq;
4525         unsigned char newline = '\n';
4526         struct trace_entry *entry;
4527         struct trace_event *event;
4528
4529         entry = iter->ent;
4530
4531         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4532                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4533                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4534                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4535                 if (trace_seq_has_overflowed(s))
4536                         return TRACE_TYPE_PARTIAL_LINE;
4537         }
4538
4539         event = ftrace_find_event(entry->type);
4540         if (event) {
4541                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4542                 if (ret != TRACE_TYPE_HANDLED)
4543                         return ret;
4544         }
4545
4546         SEQ_PUT_FIELD(s, newline);
4547
4548         return trace_handle_return(s);
4549 }
4550
4551 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4552 {
4553         struct trace_array *tr = iter->tr;
4554         struct trace_seq *s = &iter->seq;
4555         struct trace_entry *entry;
4556         struct trace_event *event;
4557
4558         entry = iter->ent;
4559
4560         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4561                 SEQ_PUT_FIELD(s, entry->pid);
4562                 SEQ_PUT_FIELD(s, iter->cpu);
4563                 SEQ_PUT_FIELD(s, iter->ts);
4564                 if (trace_seq_has_overflowed(s))
4565                         return TRACE_TYPE_PARTIAL_LINE;
4566         }
4567
4568         event = ftrace_find_event(entry->type);
4569         return event ? event->funcs->binary(iter, 0, event) :
4570                 TRACE_TYPE_HANDLED;
4571 }
4572
4573 int trace_empty(struct trace_iterator *iter)
4574 {
4575         struct ring_buffer_iter *buf_iter;
4576         int cpu;
4577
4578         /* If we are looking at one CPU buffer, only check that one */
4579         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4580                 cpu = iter->cpu_file;
4581                 buf_iter = trace_buffer_iter(iter, cpu);
4582                 if (buf_iter) {
4583                         if (!ring_buffer_iter_empty(buf_iter))
4584                                 return 0;
4585                 } else {
4586                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4587                                 return 0;
4588                 }
4589                 return 1;
4590         }
4591
4592         for_each_tracing_cpu(cpu) {
4593                 buf_iter = trace_buffer_iter(iter, cpu);
4594                 if (buf_iter) {
4595                         if (!ring_buffer_iter_empty(buf_iter))
4596                                 return 0;
4597                 } else {
4598                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4599                                 return 0;
4600                 }
4601         }
4602
4603         return 1;
4604 }
4605
4606 /*  Called with trace_event_read_lock() held. */
4607 enum print_line_t print_trace_line(struct trace_iterator *iter)
4608 {
4609         struct trace_array *tr = iter->tr;
4610         unsigned long trace_flags = tr->trace_flags;
4611         enum print_line_t ret;
4612
4613         if (iter->lost_events) {
4614                 if (iter->lost_events == (unsigned long)-1)
4615                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4616                                          iter->cpu);
4617                 else
4618                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4619                                          iter->cpu, iter->lost_events);
4620                 if (trace_seq_has_overflowed(&iter->seq))
4621                         return TRACE_TYPE_PARTIAL_LINE;
4622         }
4623
4624         if (iter->trace && iter->trace->print_line) {
4625                 ret = iter->trace->print_line(iter);
4626                 if (ret != TRACE_TYPE_UNHANDLED)
4627                         return ret;
4628         }
4629
4630         if (iter->ent->type == TRACE_BPUTS &&
4631                         trace_flags & TRACE_ITER_PRINTK &&
4632                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4633                 return trace_print_bputs_msg_only(iter);
4634
4635         if (iter->ent->type == TRACE_BPRINT &&
4636                         trace_flags & TRACE_ITER_PRINTK &&
4637                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4638                 return trace_print_bprintk_msg_only(iter);
4639
4640         if (iter->ent->type == TRACE_PRINT &&
4641                         trace_flags & TRACE_ITER_PRINTK &&
4642                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4643                 return trace_print_printk_msg_only(iter);
4644
4645         if (trace_flags & TRACE_ITER_BIN)
4646                 return print_bin_fmt(iter);
4647
4648         if (trace_flags & TRACE_ITER_HEX)
4649                 return print_hex_fmt(iter);
4650
4651         if (trace_flags & TRACE_ITER_RAW)
4652                 return print_raw_fmt(iter);
4653
4654         return print_trace_fmt(iter);
4655 }
4656
4657 void trace_latency_header(struct seq_file *m)
4658 {
4659         struct trace_iterator *iter = m->private;
4660         struct trace_array *tr = iter->tr;
4661
4662         /* print nothing if the buffers are empty */
4663         if (trace_empty(iter))
4664                 return;
4665
4666         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4667                 print_trace_header(m, iter);
4668
4669         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4670                 print_lat_help_header(m);
4671 }
4672
4673 void trace_default_header(struct seq_file *m)
4674 {
4675         struct trace_iterator *iter = m->private;
4676         struct trace_array *tr = iter->tr;
4677         unsigned long trace_flags = tr->trace_flags;
4678
4679         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4680                 return;
4681
4682         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4683                 /* print nothing if the buffers are empty */
4684                 if (trace_empty(iter))
4685                         return;
4686                 print_trace_header(m, iter);
4687                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4688                         print_lat_help_header(m);
4689         } else {
4690                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4691                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4692                                 print_func_help_header_irq(iter->array_buffer,
4693                                                            m, trace_flags);
4694                         else
4695                                 print_func_help_header(iter->array_buffer, m,
4696                                                        trace_flags);
4697                 }
4698         }
4699 }
4700
4701 static void test_ftrace_alive(struct seq_file *m)
4702 {
4703         if (!ftrace_is_dead())
4704                 return;
4705         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4706                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4707 }
4708
4709 #ifdef CONFIG_TRACER_MAX_TRACE
4710 static void show_snapshot_main_help(struct seq_file *m)
4711 {
4712         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4713                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4714                     "#                      Takes a snapshot of the main buffer.\n"
4715                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4716                     "#                      (Doesn't have to be '2' works with any number that\n"
4717                     "#                       is not a '0' or '1')\n");
4718 }
4719
4720 static void show_snapshot_percpu_help(struct seq_file *m)
4721 {
4722         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4723 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4724         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4725                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4726 #else
4727         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4728                     "#                     Must use main snapshot file to allocate.\n");
4729 #endif
4730         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4731                     "#                      (Doesn't have to be '2' works with any number that\n"
4732                     "#                       is not a '0' or '1')\n");
4733 }
4734
4735 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4736 {
4737         if (iter->tr->allocated_snapshot)
4738                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4739         else
4740                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4741
4742         seq_puts(m, "# Snapshot commands:\n");
4743         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4744                 show_snapshot_main_help(m);
4745         else
4746                 show_snapshot_percpu_help(m);
4747 }
4748 #else
4749 /* Should never be called */
4750 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4751 #endif
4752
4753 static int s_show(struct seq_file *m, void *v)
4754 {
4755         struct trace_iterator *iter = v;
4756         int ret;
4757
4758         if (iter->ent == NULL) {
4759                 if (iter->tr) {
4760                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4761                         seq_puts(m, "#\n");
4762                         test_ftrace_alive(m);
4763                 }
4764                 if (iter->snapshot && trace_empty(iter))
4765                         print_snapshot_help(m, iter);
4766                 else if (iter->trace && iter->trace->print_header)
4767                         iter->trace->print_header(m);
4768                 else
4769                         trace_default_header(m);
4770
4771         } else if (iter->leftover) {
4772                 /*
4773                  * If we filled the seq_file buffer earlier, we
4774                  * want to just show it now.
4775                  */
4776                 ret = trace_print_seq(m, &iter->seq);
4777
4778                 /* ret should this time be zero, but you never know */
4779                 iter->leftover = ret;
4780
4781         } else {
4782                 ret = print_trace_line(iter);
4783                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4784                         iter->seq.full = 0;
4785                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4786                 }
4787                 ret = trace_print_seq(m, &iter->seq);
4788                 /*
4789                  * If we overflow the seq_file buffer, then it will
4790                  * ask us for this data again at start up.
4791                  * Use that instead.
4792                  *  ret is 0 if seq_file write succeeded.
4793                  *        -1 otherwise.
4794                  */
4795                 iter->leftover = ret;
4796         }
4797
4798         return 0;
4799 }
4800
4801 /*
4802  * Should be used after trace_array_get(), trace_types_lock
4803  * ensures that i_cdev was already initialized.
4804  */
4805 static inline int tracing_get_cpu(struct inode *inode)
4806 {
4807         if (inode->i_cdev) /* See trace_create_cpu_file() */
4808                 return (long)inode->i_cdev - 1;
4809         return RING_BUFFER_ALL_CPUS;
4810 }
4811
4812 static const struct seq_operations tracer_seq_ops = {
4813         .start          = s_start,
4814         .next           = s_next,
4815         .stop           = s_stop,
4816         .show           = s_show,
4817 };
4818
4819 /*
4820  * Note, as iter itself can be allocated and freed in different
4821  * ways, this function is only used to free its content, and not
4822  * the iterator itself. The only requirement to all the allocations
4823  * is that it must zero all fields (kzalloc), as freeing works with
4824  * ethier allocated content or NULL.
4825  */
4826 static void free_trace_iter_content(struct trace_iterator *iter)
4827 {
4828         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4829         if (iter->fmt != static_fmt_buf)
4830                 kfree(iter->fmt);
4831
4832         kfree(iter->temp);
4833         kfree(iter->buffer_iter);
4834         mutex_destroy(&iter->mutex);
4835         free_cpumask_var(iter->started);
4836 }
4837
4838 static struct trace_iterator *
4839 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4840 {
4841         struct trace_array *tr = inode->i_private;
4842         struct trace_iterator *iter;
4843         int cpu;
4844
4845         if (tracing_disabled)
4846                 return ERR_PTR(-ENODEV);
4847
4848         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4849         if (!iter)
4850                 return ERR_PTR(-ENOMEM);
4851
4852         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4853                                     GFP_KERNEL);
4854         if (!iter->buffer_iter)
4855                 goto release;
4856
4857         /*
4858          * trace_find_next_entry() may need to save off iter->ent.
4859          * It will place it into the iter->temp buffer. As most
4860          * events are less than 128, allocate a buffer of that size.
4861          * If one is greater, then trace_find_next_entry() will
4862          * allocate a new buffer to adjust for the bigger iter->ent.
4863          * It's not critical if it fails to get allocated here.
4864          */
4865         iter->temp = kmalloc(128, GFP_KERNEL);
4866         if (iter->temp)
4867                 iter->temp_size = 128;
4868
4869         /*
4870          * trace_event_printf() may need to modify given format
4871          * string to replace %p with %px so that it shows real address
4872          * instead of hash value. However, that is only for the event
4873          * tracing, other tracer may not need. Defer the allocation
4874          * until it is needed.
4875          */
4876         iter->fmt = NULL;
4877         iter->fmt_size = 0;
4878
4879         mutex_lock(&trace_types_lock);
4880         iter->trace = tr->current_trace;
4881
4882         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4883                 goto fail;
4884
4885         iter->tr = tr;
4886
4887 #ifdef CONFIG_TRACER_MAX_TRACE
4888         /* Currently only the top directory has a snapshot */
4889         if (tr->current_trace->print_max || snapshot)
4890                 iter->array_buffer = &tr->max_buffer;
4891         else
4892 #endif
4893                 iter->array_buffer = &tr->array_buffer;
4894         iter->snapshot = snapshot;
4895         iter->pos = -1;
4896         iter->cpu_file = tracing_get_cpu(inode);
4897         mutex_init(&iter->mutex);
4898
4899         /* Notify the tracer early; before we stop tracing. */
4900         if (iter->trace->open)
4901                 iter->trace->open(iter);
4902
4903         /* Annotate start of buffers if we had overruns */
4904         if (ring_buffer_overruns(iter->array_buffer->buffer))
4905                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4906
4907         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4908         if (trace_clocks[tr->clock_id].in_ns)
4909                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4910
4911         /*
4912          * If pause-on-trace is enabled, then stop the trace while
4913          * dumping, unless this is the "snapshot" file
4914          */
4915         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4916                 tracing_stop_tr(tr);
4917
4918         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4919                 for_each_tracing_cpu(cpu) {
4920                         iter->buffer_iter[cpu] =
4921                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4922                                                          cpu, GFP_KERNEL);
4923                 }
4924                 ring_buffer_read_prepare_sync();
4925                 for_each_tracing_cpu(cpu) {
4926                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4927                         tracing_iter_reset(iter, cpu);
4928                 }
4929         } else {
4930                 cpu = iter->cpu_file;
4931                 iter->buffer_iter[cpu] =
4932                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4933                                                  cpu, GFP_KERNEL);
4934                 ring_buffer_read_prepare_sync();
4935                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4936                 tracing_iter_reset(iter, cpu);
4937         }
4938
4939         mutex_unlock(&trace_types_lock);
4940
4941         return iter;
4942
4943  fail:
4944         mutex_unlock(&trace_types_lock);
4945         free_trace_iter_content(iter);
4946 release:
4947         seq_release_private(inode, file);
4948         return ERR_PTR(-ENOMEM);
4949 }
4950
4951 int tracing_open_generic(struct inode *inode, struct file *filp)
4952 {
4953         int ret;
4954
4955         ret = tracing_check_open_get_tr(NULL);
4956         if (ret)
4957                 return ret;
4958
4959         filp->private_data = inode->i_private;
4960         return 0;
4961 }
4962
4963 bool tracing_is_disabled(void)
4964 {
4965         return (tracing_disabled) ? true: false;
4966 }
4967
4968 /*
4969  * Open and update trace_array ref count.
4970  * Must have the current trace_array passed to it.
4971  */
4972 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4973 {
4974         struct trace_array *tr = inode->i_private;
4975         int ret;
4976
4977         ret = tracing_check_open_get_tr(tr);
4978         if (ret)
4979                 return ret;
4980
4981         filp->private_data = inode->i_private;
4982
4983         return 0;
4984 }
4985
4986 /*
4987  * The private pointer of the inode is the trace_event_file.
4988  * Update the tr ref count associated to it.
4989  */
4990 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4991 {
4992         struct trace_event_file *file = inode->i_private;
4993         int ret;
4994
4995         ret = tracing_check_open_get_tr(file->tr);
4996         if (ret)
4997                 return ret;
4998
4999         mutex_lock(&event_mutex);
5000
5001         /* Fail if the file is marked for removal */
5002         if (file->flags & EVENT_FILE_FL_FREED) {
5003                 trace_array_put(file->tr);
5004                 ret = -ENODEV;
5005         } else {
5006                 event_file_get(file);
5007         }
5008
5009         mutex_unlock(&event_mutex);
5010         if (ret)
5011                 return ret;
5012
5013         filp->private_data = inode->i_private;
5014
5015         return 0;
5016 }
5017
5018 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5019 {
5020         struct trace_event_file *file = inode->i_private;
5021
5022         trace_array_put(file->tr);
5023         event_file_put(file);
5024
5025         return 0;
5026 }
5027
5028 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5029 {
5030         tracing_release_file_tr(inode, filp);
5031         return single_release(inode, filp);
5032 }
5033
5034 static int tracing_mark_open(struct inode *inode, struct file *filp)
5035 {
5036         stream_open(inode, filp);
5037         return tracing_open_generic_tr(inode, filp);
5038 }
5039
5040 static int tracing_release(struct inode *inode, struct file *file)
5041 {
5042         struct trace_array *tr = inode->i_private;
5043         struct seq_file *m = file->private_data;
5044         struct trace_iterator *iter;
5045         int cpu;
5046
5047         if (!(file->f_mode & FMODE_READ)) {
5048                 trace_array_put(tr);
5049                 return 0;
5050         }
5051
5052         /* Writes do not use seq_file */
5053         iter = m->private;
5054         mutex_lock(&trace_types_lock);
5055
5056         for_each_tracing_cpu(cpu) {
5057                 if (iter->buffer_iter[cpu])
5058                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5059         }
5060
5061         if (iter->trace && iter->trace->close)
5062                 iter->trace->close(iter);
5063
5064         if (!iter->snapshot && tr->stop_count)
5065                 /* reenable tracing if it was previously enabled */
5066                 tracing_start_tr(tr);
5067
5068         __trace_array_put(tr);
5069
5070         mutex_unlock(&trace_types_lock);
5071
5072         free_trace_iter_content(iter);
5073         seq_release_private(inode, file);
5074
5075         return 0;
5076 }
5077
5078 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5079 {
5080         struct trace_array *tr = inode->i_private;
5081
5082         trace_array_put(tr);
5083         return 0;
5084 }
5085
5086 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5087 {
5088         struct trace_array *tr = inode->i_private;
5089
5090         trace_array_put(tr);
5091
5092         return single_release(inode, file);
5093 }
5094
5095 static int tracing_open(struct inode *inode, struct file *file)
5096 {
5097         struct trace_array *tr = inode->i_private;
5098         struct trace_iterator *iter;
5099         int ret;
5100
5101         ret = tracing_check_open_get_tr(tr);
5102         if (ret)
5103                 return ret;
5104
5105         /* If this file was open for write, then erase contents */
5106         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5107                 int cpu = tracing_get_cpu(inode);
5108                 struct array_buffer *trace_buf = &tr->array_buffer;
5109
5110 #ifdef CONFIG_TRACER_MAX_TRACE
5111                 if (tr->current_trace->print_max)
5112                         trace_buf = &tr->max_buffer;
5113 #endif
5114
5115                 if (cpu == RING_BUFFER_ALL_CPUS)
5116                         tracing_reset_online_cpus(trace_buf);
5117                 else
5118                         tracing_reset_cpu(trace_buf, cpu);
5119         }
5120
5121         if (file->f_mode & FMODE_READ) {
5122                 iter = __tracing_open(inode, file, false);
5123                 if (IS_ERR(iter))
5124                         ret = PTR_ERR(iter);
5125                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5126                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5127         }
5128
5129         if (ret < 0)
5130                 trace_array_put(tr);
5131
5132         return ret;
5133 }
5134
5135 /*
5136  * Some tracers are not suitable for instance buffers.
5137  * A tracer is always available for the global array (toplevel)
5138  * or if it explicitly states that it is.
5139  */
5140 static bool
5141 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5142 {
5143         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5144 }
5145
5146 /* Find the next tracer that this trace array may use */
5147 static struct tracer *
5148 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5149 {
5150         while (t && !trace_ok_for_array(t, tr))
5151                 t = t->next;
5152
5153         return t;
5154 }
5155
5156 static void *
5157 t_next(struct seq_file *m, void *v, loff_t *pos)
5158 {
5159         struct trace_array *tr = m->private;
5160         struct tracer *t = v;
5161
5162         (*pos)++;
5163
5164         if (t)
5165                 t = get_tracer_for_array(tr, t->next);
5166
5167         return t;
5168 }
5169
5170 static void *t_start(struct seq_file *m, loff_t *pos)
5171 {
5172         struct trace_array *tr = m->private;
5173         struct tracer *t;
5174         loff_t l = 0;
5175
5176         mutex_lock(&trace_types_lock);
5177
5178         t = get_tracer_for_array(tr, trace_types);
5179         for (; t && l < *pos; t = t_next(m, t, &l))
5180                         ;
5181
5182         return t;
5183 }
5184
5185 static void t_stop(struct seq_file *m, void *p)
5186 {
5187         mutex_unlock(&trace_types_lock);
5188 }
5189
5190 static int t_show(struct seq_file *m, void *v)
5191 {
5192         struct tracer *t = v;
5193
5194         if (!t)
5195                 return 0;
5196
5197         seq_puts(m, t->name);
5198         if (t->next)
5199                 seq_putc(m, ' ');
5200         else
5201                 seq_putc(m, '\n');
5202
5203         return 0;
5204 }
5205
5206 static const struct seq_operations show_traces_seq_ops = {
5207         .start          = t_start,
5208         .next           = t_next,
5209         .stop           = t_stop,
5210         .show           = t_show,
5211 };
5212
5213 static int show_traces_open(struct inode *inode, struct file *file)
5214 {
5215         struct trace_array *tr = inode->i_private;
5216         struct seq_file *m;
5217         int ret;
5218
5219         ret = tracing_check_open_get_tr(tr);
5220         if (ret)
5221                 return ret;
5222
5223         ret = seq_open(file, &show_traces_seq_ops);
5224         if (ret) {
5225                 trace_array_put(tr);
5226                 return ret;
5227         }
5228
5229         m = file->private_data;
5230         m->private = tr;
5231
5232         return 0;
5233 }
5234
5235 static int show_traces_release(struct inode *inode, struct file *file)
5236 {
5237         struct trace_array *tr = inode->i_private;
5238
5239         trace_array_put(tr);
5240         return seq_release(inode, file);
5241 }
5242
5243 static ssize_t
5244 tracing_write_stub(struct file *filp, const char __user *ubuf,
5245                    size_t count, loff_t *ppos)
5246 {
5247         return count;
5248 }
5249
5250 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5251 {
5252         int ret;
5253
5254         if (file->f_mode & FMODE_READ)
5255                 ret = seq_lseek(file, offset, whence);
5256         else
5257                 file->f_pos = ret = 0;
5258
5259         return ret;
5260 }
5261
5262 static const struct file_operations tracing_fops = {
5263         .open           = tracing_open,
5264         .read           = seq_read,
5265         .read_iter      = seq_read_iter,
5266         .splice_read    = copy_splice_read,
5267         .write          = tracing_write_stub,
5268         .llseek         = tracing_lseek,
5269         .release        = tracing_release,
5270 };
5271
5272 static const struct file_operations show_traces_fops = {
5273         .open           = show_traces_open,
5274         .read           = seq_read,
5275         .llseek         = seq_lseek,
5276         .release        = show_traces_release,
5277 };
5278
5279 static ssize_t
5280 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5281                      size_t count, loff_t *ppos)
5282 {
5283         struct trace_array *tr = file_inode(filp)->i_private;
5284         char *mask_str;
5285         int len;
5286
5287         len = snprintf(NULL, 0, "%*pb\n",
5288                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5289         mask_str = kmalloc(len, GFP_KERNEL);
5290         if (!mask_str)
5291                 return -ENOMEM;
5292
5293         len = snprintf(mask_str, len, "%*pb\n",
5294                        cpumask_pr_args(tr->tracing_cpumask));
5295         if (len >= count) {
5296                 count = -EINVAL;
5297                 goto out_err;
5298         }
5299         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5300
5301 out_err:
5302         kfree(mask_str);
5303
5304         return count;
5305 }
5306
5307 int tracing_set_cpumask(struct trace_array *tr,
5308                         cpumask_var_t tracing_cpumask_new)
5309 {
5310         int cpu;
5311
5312         if (!tr)
5313                 return -EINVAL;
5314
5315         local_irq_disable();
5316         arch_spin_lock(&tr->max_lock);
5317         for_each_tracing_cpu(cpu) {
5318                 /*
5319                  * Increase/decrease the disabled counter if we are
5320                  * about to flip a bit in the cpumask:
5321                  */
5322                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5323                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5324                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5325                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5328 #endif
5329                 }
5330                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5331                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5332                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5333                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5334 #ifdef CONFIG_TRACER_MAX_TRACE
5335                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5336 #endif
5337                 }
5338         }
5339         arch_spin_unlock(&tr->max_lock);
5340         local_irq_enable();
5341
5342         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5343
5344         return 0;
5345 }
5346
5347 static ssize_t
5348 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5349                       size_t count, loff_t *ppos)
5350 {
5351         struct trace_array *tr = file_inode(filp)->i_private;
5352         cpumask_var_t tracing_cpumask_new;
5353         int err;
5354
5355         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5356                 return -ENOMEM;
5357
5358         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5359         if (err)
5360                 goto err_free;
5361
5362         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5363         if (err)
5364                 goto err_free;
5365
5366         free_cpumask_var(tracing_cpumask_new);
5367
5368         return count;
5369
5370 err_free:
5371         free_cpumask_var(tracing_cpumask_new);
5372
5373         return err;
5374 }
5375
5376 static const struct file_operations tracing_cpumask_fops = {
5377         .open           = tracing_open_generic_tr,
5378         .read           = tracing_cpumask_read,
5379         .write          = tracing_cpumask_write,
5380         .release        = tracing_release_generic_tr,
5381         .llseek         = generic_file_llseek,
5382 };
5383
5384 static int tracing_trace_options_show(struct seq_file *m, void *v)
5385 {
5386         struct tracer_opt *trace_opts;
5387         struct trace_array *tr = m->private;
5388         u32 tracer_flags;
5389         int i;
5390
5391         mutex_lock(&trace_types_lock);
5392         tracer_flags = tr->current_trace->flags->val;
5393         trace_opts = tr->current_trace->flags->opts;
5394
5395         for (i = 0; trace_options[i]; i++) {
5396                 if (tr->trace_flags & (1 << i))
5397                         seq_printf(m, "%s\n", trace_options[i]);
5398                 else
5399                         seq_printf(m, "no%s\n", trace_options[i]);
5400         }
5401
5402         for (i = 0; trace_opts[i].name; i++) {
5403                 if (tracer_flags & trace_opts[i].bit)
5404                         seq_printf(m, "%s\n", trace_opts[i].name);
5405                 else
5406                         seq_printf(m, "no%s\n", trace_opts[i].name);
5407         }
5408         mutex_unlock(&trace_types_lock);
5409
5410         return 0;
5411 }
5412
5413 static int __set_tracer_option(struct trace_array *tr,
5414                                struct tracer_flags *tracer_flags,
5415                                struct tracer_opt *opts, int neg)
5416 {
5417         struct tracer *trace = tracer_flags->trace;
5418         int ret;
5419
5420         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5421         if (ret)
5422                 return ret;
5423
5424         if (neg)
5425                 tracer_flags->val &= ~opts->bit;
5426         else
5427                 tracer_flags->val |= opts->bit;
5428         return 0;
5429 }
5430
5431 /* Try to assign a tracer specific option */
5432 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5433 {
5434         struct tracer *trace = tr->current_trace;
5435         struct tracer_flags *tracer_flags = trace->flags;
5436         struct tracer_opt *opts = NULL;
5437         int i;
5438
5439         for (i = 0; tracer_flags->opts[i].name; i++) {
5440                 opts = &tracer_flags->opts[i];
5441
5442                 if (strcmp(cmp, opts->name) == 0)
5443                         return __set_tracer_option(tr, trace->flags, opts, neg);
5444         }
5445
5446         return -EINVAL;
5447 }
5448
5449 /* Some tracers require overwrite to stay enabled */
5450 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5451 {
5452         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5453                 return -1;
5454
5455         return 0;
5456 }
5457
5458 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5459 {
5460         int *map;
5461
5462         if ((mask == TRACE_ITER_RECORD_TGID) ||
5463             (mask == TRACE_ITER_RECORD_CMD))
5464                 lockdep_assert_held(&event_mutex);
5465
5466         /* do nothing if flag is already set */
5467         if (!!(tr->trace_flags & mask) == !!enabled)
5468                 return 0;
5469
5470         /* Give the tracer a chance to approve the change */
5471         if (tr->current_trace->flag_changed)
5472                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5473                         return -EINVAL;
5474
5475         if (enabled)
5476                 tr->trace_flags |= mask;
5477         else
5478                 tr->trace_flags &= ~mask;
5479
5480         if (mask == TRACE_ITER_RECORD_CMD)
5481                 trace_event_enable_cmd_record(enabled);
5482
5483         if (mask == TRACE_ITER_RECORD_TGID) {
5484                 if (!tgid_map) {
5485                         tgid_map_max = pid_max;
5486                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5487                                        GFP_KERNEL);
5488
5489                         /*
5490                          * Pairs with smp_load_acquire() in
5491                          * trace_find_tgid_ptr() to ensure that if it observes
5492                          * the tgid_map we just allocated then it also observes
5493                          * the corresponding tgid_map_max value.
5494                          */
5495                         smp_store_release(&tgid_map, map);
5496                 }
5497                 if (!tgid_map) {
5498                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5499                         return -ENOMEM;
5500                 }
5501
5502                 trace_event_enable_tgid_record(enabled);
5503         }
5504
5505         if (mask == TRACE_ITER_EVENT_FORK)
5506                 trace_event_follow_fork(tr, enabled);
5507
5508         if (mask == TRACE_ITER_FUNC_FORK)
5509                 ftrace_pid_follow_fork(tr, enabled);
5510
5511         if (mask == TRACE_ITER_OVERWRITE) {
5512                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5513 #ifdef CONFIG_TRACER_MAX_TRACE
5514                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5515 #endif
5516         }
5517
5518         if (mask == TRACE_ITER_PRINTK) {
5519                 trace_printk_start_stop_comm(enabled);
5520                 trace_printk_control(enabled);
5521         }
5522
5523         return 0;
5524 }
5525
5526 int trace_set_options(struct trace_array *tr, char *option)
5527 {
5528         char *cmp;
5529         int neg = 0;
5530         int ret;
5531         size_t orig_len = strlen(option);
5532         int len;
5533
5534         cmp = strstrip(option);
5535
5536         len = str_has_prefix(cmp, "no");
5537         if (len)
5538                 neg = 1;
5539
5540         cmp += len;
5541
5542         mutex_lock(&event_mutex);
5543         mutex_lock(&trace_types_lock);
5544
5545         ret = match_string(trace_options, -1, cmp);
5546         /* If no option could be set, test the specific tracer options */
5547         if (ret < 0)
5548                 ret = set_tracer_option(tr, cmp, neg);
5549         else
5550                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5551
5552         mutex_unlock(&trace_types_lock);
5553         mutex_unlock(&event_mutex);
5554
5555         /*
5556          * If the first trailing whitespace is replaced with '\0' by strstrip,
5557          * turn it back into a space.
5558          */
5559         if (orig_len > strlen(option))
5560                 option[strlen(option)] = ' ';
5561
5562         return ret;
5563 }
5564
5565 static void __init apply_trace_boot_options(void)
5566 {
5567         char *buf = trace_boot_options_buf;
5568         char *option;
5569
5570         while (true) {
5571                 option = strsep(&buf, ",");
5572
5573                 if (!option)
5574                         break;
5575
5576                 if (*option)
5577                         trace_set_options(&global_trace, option);
5578
5579                 /* Put back the comma to allow this to be called again */
5580                 if (buf)
5581                         *(buf - 1) = ',';
5582         }
5583 }
5584
5585 static ssize_t
5586 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5587                         size_t cnt, loff_t *ppos)
5588 {
5589         struct seq_file *m = filp->private_data;
5590         struct trace_array *tr = m->private;
5591         char buf[64];
5592         int ret;
5593
5594         if (cnt >= sizeof(buf))
5595                 return -EINVAL;
5596
5597         if (copy_from_user(buf, ubuf, cnt))
5598                 return -EFAULT;
5599
5600         buf[cnt] = 0;
5601
5602         ret = trace_set_options(tr, buf);
5603         if (ret < 0)
5604                 return ret;
5605
5606         *ppos += cnt;
5607
5608         return cnt;
5609 }
5610
5611 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5612 {
5613         struct trace_array *tr = inode->i_private;
5614         int ret;
5615
5616         ret = tracing_check_open_get_tr(tr);
5617         if (ret)
5618                 return ret;
5619
5620         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5621         if (ret < 0)
5622                 trace_array_put(tr);
5623
5624         return ret;
5625 }
5626
5627 static const struct file_operations tracing_iter_fops = {
5628         .open           = tracing_trace_options_open,
5629         .read           = seq_read,
5630         .llseek         = seq_lseek,
5631         .release        = tracing_single_release_tr,
5632         .write          = tracing_trace_options_write,
5633 };
5634
5635 static const char readme_msg[] =
5636         "tracing mini-HOWTO:\n\n"
5637         "# echo 0 > tracing_on : quick way to disable tracing\n"
5638         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5639         " Important files:\n"
5640         "  trace\t\t\t- The static contents of the buffer\n"
5641         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5642         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5643         "  current_tracer\t- function and latency tracers\n"
5644         "  available_tracers\t- list of configured tracers for current_tracer\n"
5645         "  error_log\t- error log for failed commands (that support it)\n"
5646         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5647         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5648         "  trace_clock\t\t- change the clock used to order events\n"
5649         "       local:   Per cpu clock but may not be synced across CPUs\n"
5650         "      global:   Synced across CPUs but slows tracing down.\n"
5651         "     counter:   Not a clock, but just an increment\n"
5652         "      uptime:   Jiffy counter from time of boot\n"
5653         "        perf:   Same clock that perf events use\n"
5654 #ifdef CONFIG_X86_64
5655         "     x86-tsc:   TSC cycle counter\n"
5656 #endif
5657         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5658         "       delta:   Delta difference against a buffer-wide timestamp\n"
5659         "    absolute:   Absolute (standalone) timestamp\n"
5660         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5661         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5662         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5663         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5664         "\t\t\t  Remove sub-buffer with rmdir\n"
5665         "  trace_options\t\t- Set format or modify how tracing happens\n"
5666         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5667         "\t\t\t  option name\n"
5668         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5669 #ifdef CONFIG_DYNAMIC_FTRACE
5670         "\n  available_filter_functions - list of functions that can be filtered on\n"
5671         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5672         "\t\t\t  functions\n"
5673         "\t     accepts: func_full_name or glob-matching-pattern\n"
5674         "\t     modules: Can select a group via module\n"
5675         "\t      Format: :mod:<module-name>\n"
5676         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5677         "\t    triggers: a command to perform when function is hit\n"
5678         "\t      Format: <function>:<trigger>[:count]\n"
5679         "\t     trigger: traceon, traceoff\n"
5680         "\t\t      enable_event:<system>:<event>\n"
5681         "\t\t      disable_event:<system>:<event>\n"
5682 #ifdef CONFIG_STACKTRACE
5683         "\t\t      stacktrace\n"
5684 #endif
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686         "\t\t      snapshot\n"
5687 #endif
5688         "\t\t      dump\n"
5689         "\t\t      cpudump\n"
5690         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5691         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5692         "\t     The first one will disable tracing every time do_fault is hit\n"
5693         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5694         "\t       The first time do trap is hit and it disables tracing, the\n"
5695         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5696         "\t       the counter will not decrement. It only decrements when the\n"
5697         "\t       trigger did work\n"
5698         "\t     To remove trigger without count:\n"
5699         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5700         "\t     To remove trigger with a count:\n"
5701         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5702         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5703         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5704         "\t    modules: Can select a group via module command :mod:\n"
5705         "\t    Does not accept triggers\n"
5706 #endif /* CONFIG_DYNAMIC_FTRACE */
5707 #ifdef CONFIG_FUNCTION_TRACER
5708         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5709         "\t\t    (function)\n"
5710         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5711         "\t\t    (function)\n"
5712 #endif
5713 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5714         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5715         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5716         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5717 #endif
5718 #ifdef CONFIG_TRACER_SNAPSHOT
5719         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5720         "\t\t\t  snapshot buffer. Read the contents for more\n"
5721         "\t\t\t  information\n"
5722 #endif
5723 #ifdef CONFIG_STACK_TRACER
5724         "  stack_trace\t\t- Shows the max stack trace when active\n"
5725         "  stack_max_size\t- Shows current max stack size that was traced\n"
5726         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5727         "\t\t\t  new trace)\n"
5728 #ifdef CONFIG_DYNAMIC_FTRACE
5729         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5730         "\t\t\t  traces\n"
5731 #endif
5732 #endif /* CONFIG_STACK_TRACER */
5733 #ifdef CONFIG_DYNAMIC_EVENTS
5734         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5735         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5736 #endif
5737 #ifdef CONFIG_KPROBE_EVENTS
5738         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5739         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5740 #endif
5741 #ifdef CONFIG_UPROBE_EVENTS
5742         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5743         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5744 #endif
5745 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5746     defined(CONFIG_FPROBE_EVENTS)
5747         "\t  accepts: event-definitions (one definition per line)\n"
5748 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5749         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5750         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5751 #endif
5752 #ifdef CONFIG_FPROBE_EVENTS
5753         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5754         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5755 #endif
5756 #ifdef CONFIG_HIST_TRIGGERS
5757         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5758 #endif
5759         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5760         "\t           -:[<group>/][<event>]\n"
5761 #ifdef CONFIG_KPROBE_EVENTS
5762         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5763   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5764 #endif
5765 #ifdef CONFIG_UPROBE_EVENTS
5766   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5767 #endif
5768         "\t     args: <name>=fetcharg[:type]\n"
5769         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5770 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5771 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5772         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5773         "\t           <argname>[->field[->field|.field...]],\n"
5774 #else
5775         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5776 #endif
5777 #else
5778         "\t           $stack<index>, $stack, $retval, $comm,\n"
5779 #endif
5780         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5781         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5782         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5783         "\t           symstr, <type>\\[<array-size>\\]\n"
5784 #ifdef CONFIG_HIST_TRIGGERS
5785         "\t    field: <stype> <name>;\n"
5786         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5787         "\t           [unsigned] char/int/long\n"
5788 #endif
5789         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5790         "\t            of the <attached-group>/<attached-event>.\n"
5791 #endif
5792         "  events/\t\t- Directory containing all trace event subsystems:\n"
5793         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5794         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5795         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5796         "\t\t\t  events\n"
5797         "      filter\t\t- If set, only events passing filter are traced\n"
5798         "  events/<system>/<event>/\t- Directory containing control files for\n"
5799         "\t\t\t  <event>:\n"
5800         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5801         "      filter\t\t- If set, only events passing filter are traced\n"
5802         "      trigger\t\t- If set, a command to perform when event is hit\n"
5803         "\t    Format: <trigger>[:count][if <filter>]\n"
5804         "\t   trigger: traceon, traceoff\n"
5805         "\t            enable_event:<system>:<event>\n"
5806         "\t            disable_event:<system>:<event>\n"
5807 #ifdef CONFIG_HIST_TRIGGERS
5808         "\t            enable_hist:<system>:<event>\n"
5809         "\t            disable_hist:<system>:<event>\n"
5810 #endif
5811 #ifdef CONFIG_STACKTRACE
5812         "\t\t    stacktrace\n"
5813 #endif
5814 #ifdef CONFIG_TRACER_SNAPSHOT
5815         "\t\t    snapshot\n"
5816 #endif
5817 #ifdef CONFIG_HIST_TRIGGERS
5818         "\t\t    hist (see below)\n"
5819 #endif
5820         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5821         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5822         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5823         "\t                  events/block/block_unplug/trigger\n"
5824         "\t   The first disables tracing every time block_unplug is hit.\n"
5825         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5826         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5827         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5828         "\t   Like function triggers, the counter is only decremented if it\n"
5829         "\t    enabled or disabled tracing.\n"
5830         "\t   To remove a trigger without a count:\n"
5831         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5832         "\t   To remove a trigger with a count:\n"
5833         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5834         "\t   Filters can be ignored when removing a trigger.\n"
5835 #ifdef CONFIG_HIST_TRIGGERS
5836         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5837         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5838         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5839         "\t            [:values=<field1[,field2,...]>]\n"
5840         "\t            [:sort=<field1[,field2,...]>]\n"
5841         "\t            [:size=#entries]\n"
5842         "\t            [:pause][:continue][:clear]\n"
5843         "\t            [:name=histname1]\n"
5844         "\t            [:nohitcount]\n"
5845         "\t            [:<handler>.<action>]\n"
5846         "\t            [if <filter>]\n\n"
5847         "\t    Note, special fields can be used as well:\n"
5848         "\t            common_timestamp - to record current timestamp\n"
5849         "\t            common_cpu - to record the CPU the event happened on\n"
5850         "\n"
5851         "\t    A hist trigger variable can be:\n"
5852         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5853         "\t        - a reference to another variable e.g. y=$x,\n"
5854         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5855         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5856         "\n"
5857         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5858         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5859         "\t    variable reference, field or numeric literal.\n"
5860         "\n"
5861         "\t    When a matching event is hit, an entry is added to a hash\n"
5862         "\t    table using the key(s) and value(s) named, and the value of a\n"
5863         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5864         "\t    correspond to fields in the event's format description.  Keys\n"
5865         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5866         "\t    Compound keys consisting of up to two fields can be specified\n"
5867         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5868         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5869         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5870         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5871         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5872         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5873         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5874         "\t    its histogram data will be shared with other triggers of the\n"
5875         "\t    same name, and trigger hits will update this common data.\n\n"
5876         "\t    Reading the 'hist' file for the event will dump the hash\n"
5877         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5878         "\t    triggers attached to an event, there will be a table for each\n"
5879         "\t    trigger in the output.  The table displayed for a named\n"
5880         "\t    trigger will be the same as any other instance having the\n"
5881         "\t    same name.  The default format used to display a given field\n"
5882         "\t    can be modified by appending any of the following modifiers\n"
5883         "\t    to the field name, as applicable:\n\n"
5884         "\t            .hex        display a number as a hex value\n"
5885         "\t            .sym        display an address as a symbol\n"
5886         "\t            .sym-offset display an address as a symbol and offset\n"
5887         "\t            .execname   display a common_pid as a program name\n"
5888         "\t            .syscall    display a syscall id as a syscall name\n"
5889         "\t            .log2       display log2 value rather than raw number\n"
5890         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5891         "\t            .usecs      display a common_timestamp in microseconds\n"
5892         "\t            .percent    display a number of percentage value\n"
5893         "\t            .graph      display a bar-graph of a value\n\n"
5894         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5895         "\t    trigger or to start a hist trigger but not log any events\n"
5896         "\t    until told to do so.  'continue' can be used to start or\n"
5897         "\t    restart a paused hist trigger.\n\n"
5898         "\t    The 'clear' parameter will clear the contents of a running\n"
5899         "\t    hist trigger and leave its current paused/active state\n"
5900         "\t    unchanged.\n\n"
5901         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5902         "\t    raw hitcount in the histogram.\n\n"
5903         "\t    The enable_hist and disable_hist triggers can be used to\n"
5904         "\t    have one event conditionally start and stop another event's\n"
5905         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5906         "\t    the enable_event and disable_event triggers.\n\n"
5907         "\t    Hist trigger handlers and actions are executed whenever a\n"
5908         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5909         "\t        <handler>.<action>\n\n"
5910         "\t    The available handlers are:\n\n"
5911         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5912         "\t        onmax(var)               - invoke if var exceeds current max\n"
5913         "\t        onchange(var)            - invoke action if var changes\n\n"
5914         "\t    The available actions are:\n\n"
5915         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5916         "\t        save(field,...)                      - save current event fields\n"
5917 #ifdef CONFIG_TRACER_SNAPSHOT
5918         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5919 #endif
5920 #ifdef CONFIG_SYNTH_EVENTS
5921         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5922         "\t  Write into this file to define/undefine new synthetic events.\n"
5923         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5924 #endif
5925 #endif
5926 ;
5927
5928 static ssize_t
5929 tracing_readme_read(struct file *filp, char __user *ubuf,
5930                        size_t cnt, loff_t *ppos)
5931 {
5932         return simple_read_from_buffer(ubuf, cnt, ppos,
5933                                         readme_msg, strlen(readme_msg));
5934 }
5935
5936 static const struct file_operations tracing_readme_fops = {
5937         .open           = tracing_open_generic,
5938         .read           = tracing_readme_read,
5939         .llseek         = generic_file_llseek,
5940 };
5941
5942 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5943 {
5944         int pid = ++(*pos);
5945
5946         return trace_find_tgid_ptr(pid);
5947 }
5948
5949 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5950 {
5951         int pid = *pos;
5952
5953         return trace_find_tgid_ptr(pid);
5954 }
5955
5956 static void saved_tgids_stop(struct seq_file *m, void *v)
5957 {
5958 }
5959
5960 static int saved_tgids_show(struct seq_file *m, void *v)
5961 {
5962         int *entry = (int *)v;
5963         int pid = entry - tgid_map;
5964         int tgid = *entry;
5965
5966         if (tgid == 0)
5967                 return SEQ_SKIP;
5968
5969         seq_printf(m, "%d %d\n", pid, tgid);
5970         return 0;
5971 }
5972
5973 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5974         .start          = saved_tgids_start,
5975         .stop           = saved_tgids_stop,
5976         .next           = saved_tgids_next,
5977         .show           = saved_tgids_show,
5978 };
5979
5980 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5981 {
5982         int ret;
5983
5984         ret = tracing_check_open_get_tr(NULL);
5985         if (ret)
5986                 return ret;
5987
5988         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5989 }
5990
5991
5992 static const struct file_operations tracing_saved_tgids_fops = {
5993         .open           = tracing_saved_tgids_open,
5994         .read           = seq_read,
5995         .llseek         = seq_lseek,
5996         .release        = seq_release,
5997 };
5998
5999 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
6000 {
6001         unsigned int *ptr = v;
6002
6003         if (*pos || m->count)
6004                 ptr++;
6005
6006         (*pos)++;
6007
6008         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
6009              ptr++) {
6010                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
6011                         continue;
6012
6013                 return ptr;
6014         }
6015
6016         return NULL;
6017 }
6018
6019 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6020 {
6021         void *v;
6022         loff_t l = 0;
6023
6024         preempt_disable();
6025         arch_spin_lock(&trace_cmdline_lock);
6026
6027         v = &savedcmd->map_cmdline_to_pid[0];
6028         while (l <= *pos) {
6029                 v = saved_cmdlines_next(m, v, &l);
6030                 if (!v)
6031                         return NULL;
6032         }
6033
6034         return v;
6035 }
6036
6037 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6038 {
6039         arch_spin_unlock(&trace_cmdline_lock);
6040         preempt_enable();
6041 }
6042
6043 static int saved_cmdlines_show(struct seq_file *m, void *v)
6044 {
6045         char buf[TASK_COMM_LEN];
6046         unsigned int *pid = v;
6047
6048         __trace_find_cmdline(*pid, buf);
6049         seq_printf(m, "%d %s\n", *pid, buf);
6050         return 0;
6051 }
6052
6053 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6054         .start          = saved_cmdlines_start,
6055         .next           = saved_cmdlines_next,
6056         .stop           = saved_cmdlines_stop,
6057         .show           = saved_cmdlines_show,
6058 };
6059
6060 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6061 {
6062         int ret;
6063
6064         ret = tracing_check_open_get_tr(NULL);
6065         if (ret)
6066                 return ret;
6067
6068         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6069 }
6070
6071 static const struct file_operations tracing_saved_cmdlines_fops = {
6072         .open           = tracing_saved_cmdlines_open,
6073         .read           = seq_read,
6074         .llseek         = seq_lseek,
6075         .release        = seq_release,
6076 };
6077
6078 static ssize_t
6079 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6080                                  size_t cnt, loff_t *ppos)
6081 {
6082         char buf[64];
6083         int r;
6084
6085         preempt_disable();
6086         arch_spin_lock(&trace_cmdline_lock);
6087         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6088         arch_spin_unlock(&trace_cmdline_lock);
6089         preempt_enable();
6090
6091         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6092 }
6093
6094 static int tracing_resize_saved_cmdlines(unsigned int val)
6095 {
6096         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6097
6098         s = allocate_cmdlines_buffer(val);
6099         if (!s)
6100                 return -ENOMEM;
6101
6102         preempt_disable();
6103         arch_spin_lock(&trace_cmdline_lock);
6104         savedcmd_temp = savedcmd;
6105         savedcmd = s;
6106         arch_spin_unlock(&trace_cmdline_lock);
6107         preempt_enable();
6108         free_saved_cmdlines_buffer(savedcmd_temp);
6109
6110         return 0;
6111 }
6112
6113 static ssize_t
6114 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6115                                   size_t cnt, loff_t *ppos)
6116 {
6117         unsigned long val;
6118         int ret;
6119
6120         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6121         if (ret)
6122                 return ret;
6123
6124         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6125         if (!val || val > PID_MAX_DEFAULT)
6126                 return -EINVAL;
6127
6128         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6129         if (ret < 0)
6130                 return ret;
6131
6132         *ppos += cnt;
6133
6134         return cnt;
6135 }
6136
6137 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6138         .open           = tracing_open_generic,
6139         .read           = tracing_saved_cmdlines_size_read,
6140         .write          = tracing_saved_cmdlines_size_write,
6141 };
6142
6143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6144 static union trace_eval_map_item *
6145 update_eval_map(union trace_eval_map_item *ptr)
6146 {
6147         if (!ptr->map.eval_string) {
6148                 if (ptr->tail.next) {
6149                         ptr = ptr->tail.next;
6150                         /* Set ptr to the next real item (skip head) */
6151                         ptr++;
6152                 } else
6153                         return NULL;
6154         }
6155         return ptr;
6156 }
6157
6158 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6159 {
6160         union trace_eval_map_item *ptr = v;
6161
6162         /*
6163          * Paranoid! If ptr points to end, we don't want to increment past it.
6164          * This really should never happen.
6165          */
6166         (*pos)++;
6167         ptr = update_eval_map(ptr);
6168         if (WARN_ON_ONCE(!ptr))
6169                 return NULL;
6170
6171         ptr++;
6172         ptr = update_eval_map(ptr);
6173
6174         return ptr;
6175 }
6176
6177 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6178 {
6179         union trace_eval_map_item *v;
6180         loff_t l = 0;
6181
6182         mutex_lock(&trace_eval_mutex);
6183
6184         v = trace_eval_maps;
6185         if (v)
6186                 v++;
6187
6188         while (v && l < *pos) {
6189                 v = eval_map_next(m, v, &l);
6190         }
6191
6192         return v;
6193 }
6194
6195 static void eval_map_stop(struct seq_file *m, void *v)
6196 {
6197         mutex_unlock(&trace_eval_mutex);
6198 }
6199
6200 static int eval_map_show(struct seq_file *m, void *v)
6201 {
6202         union trace_eval_map_item *ptr = v;
6203
6204         seq_printf(m, "%s %ld (%s)\n",
6205                    ptr->map.eval_string, ptr->map.eval_value,
6206                    ptr->map.system);
6207
6208         return 0;
6209 }
6210
6211 static const struct seq_operations tracing_eval_map_seq_ops = {
6212         .start          = eval_map_start,
6213         .next           = eval_map_next,
6214         .stop           = eval_map_stop,
6215         .show           = eval_map_show,
6216 };
6217
6218 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6219 {
6220         int ret;
6221
6222         ret = tracing_check_open_get_tr(NULL);
6223         if (ret)
6224                 return ret;
6225
6226         return seq_open(filp, &tracing_eval_map_seq_ops);
6227 }
6228
6229 static const struct file_operations tracing_eval_map_fops = {
6230         .open           = tracing_eval_map_open,
6231         .read           = seq_read,
6232         .llseek         = seq_lseek,
6233         .release        = seq_release,
6234 };
6235
6236 static inline union trace_eval_map_item *
6237 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6238 {
6239         /* Return tail of array given the head */
6240         return ptr + ptr->head.length + 1;
6241 }
6242
6243 static void
6244 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6245                            int len)
6246 {
6247         struct trace_eval_map **stop;
6248         struct trace_eval_map **map;
6249         union trace_eval_map_item *map_array;
6250         union trace_eval_map_item *ptr;
6251
6252         stop = start + len;
6253
6254         /*
6255          * The trace_eval_maps contains the map plus a head and tail item,
6256          * where the head holds the module and length of array, and the
6257          * tail holds a pointer to the next list.
6258          */
6259         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6260         if (!map_array) {
6261                 pr_warn("Unable to allocate trace eval mapping\n");
6262                 return;
6263         }
6264
6265         mutex_lock(&trace_eval_mutex);
6266
6267         if (!trace_eval_maps)
6268                 trace_eval_maps = map_array;
6269         else {
6270                 ptr = trace_eval_maps;
6271                 for (;;) {
6272                         ptr = trace_eval_jmp_to_tail(ptr);
6273                         if (!ptr->tail.next)
6274                                 break;
6275                         ptr = ptr->tail.next;
6276
6277                 }
6278                 ptr->tail.next = map_array;
6279         }
6280         map_array->head.mod = mod;
6281         map_array->head.length = len;
6282         map_array++;
6283
6284         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6285                 map_array->map = **map;
6286                 map_array++;
6287         }
6288         memset(map_array, 0, sizeof(*map_array));
6289
6290         mutex_unlock(&trace_eval_mutex);
6291 }
6292
6293 static void trace_create_eval_file(struct dentry *d_tracer)
6294 {
6295         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6296                           NULL, &tracing_eval_map_fops);
6297 }
6298
6299 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6300 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6301 static inline void trace_insert_eval_map_file(struct module *mod,
6302                               struct trace_eval_map **start, int len) { }
6303 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6304
6305 static void trace_insert_eval_map(struct module *mod,
6306                                   struct trace_eval_map **start, int len)
6307 {
6308         struct trace_eval_map **map;
6309
6310         if (len <= 0)
6311                 return;
6312
6313         map = start;
6314
6315         trace_event_eval_update(map, len);
6316
6317         trace_insert_eval_map_file(mod, start, len);
6318 }
6319
6320 static ssize_t
6321 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6322                        size_t cnt, loff_t *ppos)
6323 {
6324         struct trace_array *tr = filp->private_data;
6325         char buf[MAX_TRACER_SIZE+2];
6326         int r;
6327
6328         mutex_lock(&trace_types_lock);
6329         r = sprintf(buf, "%s\n", tr->current_trace->name);
6330         mutex_unlock(&trace_types_lock);
6331
6332         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6333 }
6334
6335 int tracer_init(struct tracer *t, struct trace_array *tr)
6336 {
6337         tracing_reset_online_cpus(&tr->array_buffer);
6338         return t->init(tr);
6339 }
6340
6341 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6342 {
6343         int cpu;
6344
6345         for_each_tracing_cpu(cpu)
6346                 per_cpu_ptr(buf->data, cpu)->entries = val;
6347 }
6348
6349 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6350 {
6351         if (cpu == RING_BUFFER_ALL_CPUS) {
6352                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6353         } else {
6354                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6355         }
6356 }
6357
6358 #ifdef CONFIG_TRACER_MAX_TRACE
6359 /* resize @tr's buffer to the size of @size_tr's entries */
6360 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6361                                         struct array_buffer *size_buf, int cpu_id)
6362 {
6363         int cpu, ret = 0;
6364
6365         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6366                 for_each_tracing_cpu(cpu) {
6367                         ret = ring_buffer_resize(trace_buf->buffer,
6368                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6369                         if (ret < 0)
6370                                 break;
6371                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6372                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6373                 }
6374         } else {
6375                 ret = ring_buffer_resize(trace_buf->buffer,
6376                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6377                 if (ret == 0)
6378                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6379                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6380         }
6381
6382         return ret;
6383 }
6384 #endif /* CONFIG_TRACER_MAX_TRACE */
6385
6386 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6387                                         unsigned long size, int cpu)
6388 {
6389         int ret;
6390
6391         /*
6392          * If kernel or user changes the size of the ring buffer
6393          * we use the size that was given, and we can forget about
6394          * expanding it later.
6395          */
6396         trace_set_ring_buffer_expanded(tr);
6397
6398         /* May be called before buffers are initialized */
6399         if (!tr->array_buffer.buffer)
6400                 return 0;
6401
6402         /* Do not allow tracing while resizing ring buffer */
6403         tracing_stop_tr(tr);
6404
6405         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6406         if (ret < 0)
6407                 goto out_start;
6408
6409 #ifdef CONFIG_TRACER_MAX_TRACE
6410         if (!tr->allocated_snapshot)
6411                 goto out;
6412
6413         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6414         if (ret < 0) {
6415                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6416                                                      &tr->array_buffer, cpu);
6417                 if (r < 0) {
6418                         /*
6419                          * AARGH! We are left with different
6420                          * size max buffer!!!!
6421                          * The max buffer is our "snapshot" buffer.
6422                          * When a tracer needs a snapshot (one of the
6423                          * latency tracers), it swaps the max buffer
6424                          * with the saved snap shot. We succeeded to
6425                          * update the size of the main buffer, but failed to
6426                          * update the size of the max buffer. But when we tried
6427                          * to reset the main buffer to the original size, we
6428                          * failed there too. This is very unlikely to
6429                          * happen, but if it does, warn and kill all
6430                          * tracing.
6431                          */
6432                         WARN_ON(1);
6433                         tracing_disabled = 1;
6434                 }
6435                 goto out_start;
6436         }
6437
6438         update_buffer_entries(&tr->max_buffer, cpu);
6439
6440  out:
6441 #endif /* CONFIG_TRACER_MAX_TRACE */
6442
6443         update_buffer_entries(&tr->array_buffer, cpu);
6444  out_start:
6445         tracing_start_tr(tr);
6446         return ret;
6447 }
6448
6449 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6450                                   unsigned long size, int cpu_id)
6451 {
6452         int ret;
6453
6454         mutex_lock(&trace_types_lock);
6455
6456         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6457                 /* make sure, this cpu is enabled in the mask */
6458                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6459                         ret = -EINVAL;
6460                         goto out;
6461                 }
6462         }
6463
6464         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6465         if (ret < 0)
6466                 ret = -ENOMEM;
6467
6468 out:
6469         mutex_unlock(&trace_types_lock);
6470
6471         return ret;
6472 }
6473
6474
6475 /**
6476  * tracing_update_buffers - used by tracing facility to expand ring buffers
6477  * @tr: The tracing instance
6478  *
6479  * To save on memory when the tracing is never used on a system with it
6480  * configured in. The ring buffers are set to a minimum size. But once
6481  * a user starts to use the tracing facility, then they need to grow
6482  * to their default size.
6483  *
6484  * This function is to be called when a tracer is about to be used.
6485  */
6486 int tracing_update_buffers(struct trace_array *tr)
6487 {
6488         int ret = 0;
6489
6490         mutex_lock(&trace_types_lock);
6491         if (!tr->ring_buffer_expanded)
6492                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6493                                                 RING_BUFFER_ALL_CPUS);
6494         mutex_unlock(&trace_types_lock);
6495
6496         return ret;
6497 }
6498
6499 struct trace_option_dentry;
6500
6501 static void
6502 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6503
6504 /*
6505  * Used to clear out the tracer before deletion of an instance.
6506  * Must have trace_types_lock held.
6507  */
6508 static void tracing_set_nop(struct trace_array *tr)
6509 {
6510         if (tr->current_trace == &nop_trace)
6511                 return;
6512         
6513         tr->current_trace->enabled--;
6514
6515         if (tr->current_trace->reset)
6516                 tr->current_trace->reset(tr);
6517
6518         tr->current_trace = &nop_trace;
6519 }
6520
6521 static bool tracer_options_updated;
6522
6523 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6524 {
6525         /* Only enable if the directory has been created already. */
6526         if (!tr->dir)
6527                 return;
6528
6529         /* Only create trace option files after update_tracer_options finish */
6530         if (!tracer_options_updated)
6531                 return;
6532
6533         create_trace_option_files(tr, t);
6534 }
6535
6536 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6537 {
6538         struct tracer *t;
6539 #ifdef CONFIG_TRACER_MAX_TRACE
6540         bool had_max_tr;
6541 #endif
6542         int ret = 0;
6543
6544         mutex_lock(&trace_types_lock);
6545
6546         if (!tr->ring_buffer_expanded) {
6547                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6548                                                 RING_BUFFER_ALL_CPUS);
6549                 if (ret < 0)
6550                         goto out;
6551                 ret = 0;
6552         }
6553
6554         for (t = trace_types; t; t = t->next) {
6555                 if (strcmp(t->name, buf) == 0)
6556                         break;
6557         }
6558         if (!t) {
6559                 ret = -EINVAL;
6560                 goto out;
6561         }
6562         if (t == tr->current_trace)
6563                 goto out;
6564
6565 #ifdef CONFIG_TRACER_SNAPSHOT
6566         if (t->use_max_tr) {
6567                 local_irq_disable();
6568                 arch_spin_lock(&tr->max_lock);
6569                 if (tr->cond_snapshot)
6570                         ret = -EBUSY;
6571                 arch_spin_unlock(&tr->max_lock);
6572                 local_irq_enable();
6573                 if (ret)
6574                         goto out;
6575         }
6576 #endif
6577         /* Some tracers won't work on kernel command line */
6578         if (system_state < SYSTEM_RUNNING && t->noboot) {
6579                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6580                         t->name);
6581                 goto out;
6582         }
6583
6584         /* Some tracers are only allowed for the top level buffer */
6585         if (!trace_ok_for_array(t, tr)) {
6586                 ret = -EINVAL;
6587                 goto out;
6588         }
6589
6590         /* If trace pipe files are being read, we can't change the tracer */
6591         if (tr->trace_ref) {
6592                 ret = -EBUSY;
6593                 goto out;
6594         }
6595
6596         trace_branch_disable();
6597
6598         tr->current_trace->enabled--;
6599
6600         if (tr->current_trace->reset)
6601                 tr->current_trace->reset(tr);
6602
6603 #ifdef CONFIG_TRACER_MAX_TRACE
6604         had_max_tr = tr->current_trace->use_max_tr;
6605
6606         /* Current trace needs to be nop_trace before synchronize_rcu */
6607         tr->current_trace = &nop_trace;
6608
6609         if (had_max_tr && !t->use_max_tr) {
6610                 /*
6611                  * We need to make sure that the update_max_tr sees that
6612                  * current_trace changed to nop_trace to keep it from
6613                  * swapping the buffers after we resize it.
6614                  * The update_max_tr is called from interrupts disabled
6615                  * so a synchronized_sched() is sufficient.
6616                  */
6617                 synchronize_rcu();
6618                 free_snapshot(tr);
6619         }
6620
6621         if (t->use_max_tr && !tr->allocated_snapshot) {
6622                 ret = tracing_alloc_snapshot_instance(tr);
6623                 if (ret < 0)
6624                         goto out;
6625         }
6626 #else
6627         tr->current_trace = &nop_trace;
6628 #endif
6629
6630         if (t->init) {
6631                 ret = tracer_init(t, tr);
6632                 if (ret)
6633                         goto out;
6634         }
6635
6636         tr->current_trace = t;
6637         tr->current_trace->enabled++;
6638         trace_branch_enable(tr);
6639  out:
6640         mutex_unlock(&trace_types_lock);
6641
6642         return ret;
6643 }
6644
6645 static ssize_t
6646 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6647                         size_t cnt, loff_t *ppos)
6648 {
6649         struct trace_array *tr = filp->private_data;
6650         char buf[MAX_TRACER_SIZE+1];
6651         char *name;
6652         size_t ret;
6653         int err;
6654
6655         ret = cnt;
6656
6657         if (cnt > MAX_TRACER_SIZE)
6658                 cnt = MAX_TRACER_SIZE;
6659
6660         if (copy_from_user(buf, ubuf, cnt))
6661                 return -EFAULT;
6662
6663         buf[cnt] = 0;
6664
6665         name = strim(buf);
6666
6667         err = tracing_set_tracer(tr, name);
6668         if (err)
6669                 return err;
6670
6671         *ppos += ret;
6672
6673         return ret;
6674 }
6675
6676 static ssize_t
6677 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6678                    size_t cnt, loff_t *ppos)
6679 {
6680         char buf[64];
6681         int r;
6682
6683         r = snprintf(buf, sizeof(buf), "%ld\n",
6684                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6685         if (r > sizeof(buf))
6686                 r = sizeof(buf);
6687         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6688 }
6689
6690 static ssize_t
6691 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6692                     size_t cnt, loff_t *ppos)
6693 {
6694         unsigned long val;
6695         int ret;
6696
6697         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6698         if (ret)
6699                 return ret;
6700
6701         *ptr = val * 1000;
6702
6703         return cnt;
6704 }
6705
6706 static ssize_t
6707 tracing_thresh_read(struct file *filp, char __user *ubuf,
6708                     size_t cnt, loff_t *ppos)
6709 {
6710         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6711 }
6712
6713 static ssize_t
6714 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6715                      size_t cnt, loff_t *ppos)
6716 {
6717         struct trace_array *tr = filp->private_data;
6718         int ret;
6719
6720         mutex_lock(&trace_types_lock);
6721         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6722         if (ret < 0)
6723                 goto out;
6724
6725         if (tr->current_trace->update_thresh) {
6726                 ret = tr->current_trace->update_thresh(tr);
6727                 if (ret < 0)
6728                         goto out;
6729         }
6730
6731         ret = cnt;
6732 out:
6733         mutex_unlock(&trace_types_lock);
6734
6735         return ret;
6736 }
6737
6738 #ifdef CONFIG_TRACER_MAX_TRACE
6739
6740 static ssize_t
6741 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6742                      size_t cnt, loff_t *ppos)
6743 {
6744         struct trace_array *tr = filp->private_data;
6745
6746         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6747 }
6748
6749 static ssize_t
6750 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6751                       size_t cnt, loff_t *ppos)
6752 {
6753         struct trace_array *tr = filp->private_data;
6754
6755         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6756 }
6757
6758 #endif
6759
6760 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6761 {
6762         if (cpu == RING_BUFFER_ALL_CPUS) {
6763                 if (cpumask_empty(tr->pipe_cpumask)) {
6764                         cpumask_setall(tr->pipe_cpumask);
6765                         return 0;
6766                 }
6767         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6768                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6769                 return 0;
6770         }
6771         return -EBUSY;
6772 }
6773
6774 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6775 {
6776         if (cpu == RING_BUFFER_ALL_CPUS) {
6777                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6778                 cpumask_clear(tr->pipe_cpumask);
6779         } else {
6780                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6781                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6782         }
6783 }
6784
6785 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6786 {
6787         struct trace_array *tr = inode->i_private;
6788         struct trace_iterator *iter;
6789         int cpu;
6790         int ret;
6791
6792         ret = tracing_check_open_get_tr(tr);
6793         if (ret)
6794                 return ret;
6795
6796         mutex_lock(&trace_types_lock);
6797         cpu = tracing_get_cpu(inode);
6798         ret = open_pipe_on_cpu(tr, cpu);
6799         if (ret)
6800                 goto fail_pipe_on_cpu;
6801
6802         /* create a buffer to store the information to pass to userspace */
6803         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6804         if (!iter) {
6805                 ret = -ENOMEM;
6806                 goto fail_alloc_iter;
6807         }
6808
6809         trace_seq_init(&iter->seq);
6810         iter->trace = tr->current_trace;
6811
6812         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6813                 ret = -ENOMEM;
6814                 goto fail;
6815         }
6816
6817         /* trace pipe does not show start of buffer */
6818         cpumask_setall(iter->started);
6819
6820         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6821                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6822
6823         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6824         if (trace_clocks[tr->clock_id].in_ns)
6825                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6826
6827         iter->tr = tr;
6828         iter->array_buffer = &tr->array_buffer;
6829         iter->cpu_file = cpu;
6830         mutex_init(&iter->mutex);
6831         filp->private_data = iter;
6832
6833         if (iter->trace->pipe_open)
6834                 iter->trace->pipe_open(iter);
6835
6836         nonseekable_open(inode, filp);
6837
6838         tr->trace_ref++;
6839
6840         mutex_unlock(&trace_types_lock);
6841         return ret;
6842
6843 fail:
6844         kfree(iter);
6845 fail_alloc_iter:
6846         close_pipe_on_cpu(tr, cpu);
6847 fail_pipe_on_cpu:
6848         __trace_array_put(tr);
6849         mutex_unlock(&trace_types_lock);
6850         return ret;
6851 }
6852
6853 static int tracing_release_pipe(struct inode *inode, struct file *file)
6854 {
6855         struct trace_iterator *iter = file->private_data;
6856         struct trace_array *tr = inode->i_private;
6857
6858         mutex_lock(&trace_types_lock);
6859
6860         tr->trace_ref--;
6861
6862         if (iter->trace->pipe_close)
6863                 iter->trace->pipe_close(iter);
6864         close_pipe_on_cpu(tr, iter->cpu_file);
6865         mutex_unlock(&trace_types_lock);
6866
6867         free_trace_iter_content(iter);
6868         kfree(iter);
6869
6870         trace_array_put(tr);
6871
6872         return 0;
6873 }
6874
6875 static __poll_t
6876 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6877 {
6878         struct trace_array *tr = iter->tr;
6879
6880         /* Iterators are static, they should be filled or empty */
6881         if (trace_buffer_iter(iter, iter->cpu_file))
6882                 return EPOLLIN | EPOLLRDNORM;
6883
6884         if (tr->trace_flags & TRACE_ITER_BLOCK)
6885                 /*
6886                  * Always select as readable when in blocking mode
6887                  */
6888                 return EPOLLIN | EPOLLRDNORM;
6889         else
6890                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6891                                              filp, poll_table, iter->tr->buffer_percent);
6892 }
6893
6894 static __poll_t
6895 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6896 {
6897         struct trace_iterator *iter = filp->private_data;
6898
6899         return trace_poll(iter, filp, poll_table);
6900 }
6901
6902 /* Must be called with iter->mutex held. */
6903 static int tracing_wait_pipe(struct file *filp)
6904 {
6905         struct trace_iterator *iter = filp->private_data;
6906         int ret;
6907
6908         while (trace_empty(iter)) {
6909
6910                 if ((filp->f_flags & O_NONBLOCK)) {
6911                         return -EAGAIN;
6912                 }
6913
6914                 /*
6915                  * We block until we read something and tracing is disabled.
6916                  * We still block if tracing is disabled, but we have never
6917                  * read anything. This allows a user to cat this file, and
6918                  * then enable tracing. But after we have read something,
6919                  * we give an EOF when tracing is again disabled.
6920                  *
6921                  * iter->pos will be 0 if we haven't read anything.
6922                  */
6923                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6924                         break;
6925
6926                 mutex_unlock(&iter->mutex);
6927
6928                 ret = wait_on_pipe(iter, 0);
6929
6930                 mutex_lock(&iter->mutex);
6931
6932                 if (ret)
6933                         return ret;
6934         }
6935
6936         return 1;
6937 }
6938
6939 /*
6940  * Consumer reader.
6941  */
6942 static ssize_t
6943 tracing_read_pipe(struct file *filp, char __user *ubuf,
6944                   size_t cnt, loff_t *ppos)
6945 {
6946         struct trace_iterator *iter = filp->private_data;
6947         ssize_t sret;
6948
6949         /*
6950          * Avoid more than one consumer on a single file descriptor
6951          * This is just a matter of traces coherency, the ring buffer itself
6952          * is protected.
6953          */
6954         mutex_lock(&iter->mutex);
6955
6956         /* return any leftover data */
6957         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6958         if (sret != -EBUSY)
6959                 goto out;
6960
6961         trace_seq_init(&iter->seq);
6962
6963         if (iter->trace->read) {
6964                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6965                 if (sret)
6966                         goto out;
6967         }
6968
6969 waitagain:
6970         sret = tracing_wait_pipe(filp);
6971         if (sret <= 0)
6972                 goto out;
6973
6974         /* stop when tracing is finished */
6975         if (trace_empty(iter)) {
6976                 sret = 0;
6977                 goto out;
6978         }
6979
6980         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6981                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6982
6983         /* reset all but tr, trace, and overruns */
6984         trace_iterator_reset(iter);
6985         cpumask_clear(iter->started);
6986         trace_seq_init(&iter->seq);
6987
6988         trace_event_read_lock();
6989         trace_access_lock(iter->cpu_file);
6990         while (trace_find_next_entry_inc(iter) != NULL) {
6991                 enum print_line_t ret;
6992                 int save_len = iter->seq.seq.len;
6993
6994                 ret = print_trace_line(iter);
6995                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6996                         /*
6997                          * If one print_trace_line() fills entire trace_seq in one shot,
6998                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6999                          * In this case, we need to consume it, otherwise, loop will peek
7000                          * this event next time, resulting in an infinite loop.
7001                          */
7002                         if (save_len == 0) {
7003                                 iter->seq.full = 0;
7004                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
7005                                 trace_consume(iter);
7006                                 break;
7007                         }
7008
7009                         /* In other cases, don't print partial lines */
7010                         iter->seq.seq.len = save_len;
7011                         break;
7012                 }
7013                 if (ret != TRACE_TYPE_NO_CONSUME)
7014                         trace_consume(iter);
7015
7016                 if (trace_seq_used(&iter->seq) >= cnt)
7017                         break;
7018
7019                 /*
7020                  * Setting the full flag means we reached the trace_seq buffer
7021                  * size and we should leave by partial output condition above.
7022                  * One of the trace_seq_* functions is not used properly.
7023                  */
7024                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7025                           iter->ent->type);
7026         }
7027         trace_access_unlock(iter->cpu_file);
7028         trace_event_read_unlock();
7029
7030         /* Now copy what we have to the user */
7031         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7032         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7033                 trace_seq_init(&iter->seq);
7034
7035         /*
7036          * If there was nothing to send to user, in spite of consuming trace
7037          * entries, go back to wait for more entries.
7038          */
7039         if (sret == -EBUSY)
7040                 goto waitagain;
7041
7042 out:
7043         mutex_unlock(&iter->mutex);
7044
7045         return sret;
7046 }
7047
7048 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7049                                      unsigned int idx)
7050 {
7051         __free_page(spd->pages[idx]);
7052 }
7053
7054 static size_t
7055 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7056 {
7057         size_t count;
7058         int save_len;
7059         int ret;
7060
7061         /* Seq buffer is page-sized, exactly what we need. */
7062         for (;;) {
7063                 save_len = iter->seq.seq.len;
7064                 ret = print_trace_line(iter);
7065
7066                 if (trace_seq_has_overflowed(&iter->seq)) {
7067                         iter->seq.seq.len = save_len;
7068                         break;
7069                 }
7070
7071                 /*
7072                  * This should not be hit, because it should only
7073                  * be set if the iter->seq overflowed. But check it
7074                  * anyway to be safe.
7075                  */
7076                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7077                         iter->seq.seq.len = save_len;
7078                         break;
7079                 }
7080
7081                 count = trace_seq_used(&iter->seq) - save_len;
7082                 if (rem < count) {
7083                         rem = 0;
7084                         iter->seq.seq.len = save_len;
7085                         break;
7086                 }
7087
7088                 if (ret != TRACE_TYPE_NO_CONSUME)
7089                         trace_consume(iter);
7090                 rem -= count;
7091                 if (!trace_find_next_entry_inc(iter))   {
7092                         rem = 0;
7093                         iter->ent = NULL;
7094                         break;
7095                 }
7096         }
7097
7098         return rem;
7099 }
7100
7101 static ssize_t tracing_splice_read_pipe(struct file *filp,
7102                                         loff_t *ppos,
7103                                         struct pipe_inode_info *pipe,
7104                                         size_t len,
7105                                         unsigned int flags)
7106 {
7107         struct page *pages_def[PIPE_DEF_BUFFERS];
7108         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7109         struct trace_iterator *iter = filp->private_data;
7110         struct splice_pipe_desc spd = {
7111                 .pages          = pages_def,
7112                 .partial        = partial_def,
7113                 .nr_pages       = 0, /* This gets updated below. */
7114                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7115                 .ops            = &default_pipe_buf_ops,
7116                 .spd_release    = tracing_spd_release_pipe,
7117         };
7118         ssize_t ret;
7119         size_t rem;
7120         unsigned int i;
7121
7122         if (splice_grow_spd(pipe, &spd))
7123                 return -ENOMEM;
7124
7125         mutex_lock(&iter->mutex);
7126
7127         if (iter->trace->splice_read) {
7128                 ret = iter->trace->splice_read(iter, filp,
7129                                                ppos, pipe, len, flags);
7130                 if (ret)
7131                         goto out_err;
7132         }
7133
7134         ret = tracing_wait_pipe(filp);
7135         if (ret <= 0)
7136                 goto out_err;
7137
7138         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7139                 ret = -EFAULT;
7140                 goto out_err;
7141         }
7142
7143         trace_event_read_lock();
7144         trace_access_lock(iter->cpu_file);
7145
7146         /* Fill as many pages as possible. */
7147         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7148                 spd.pages[i] = alloc_page(GFP_KERNEL);
7149                 if (!spd.pages[i])
7150                         break;
7151
7152                 rem = tracing_fill_pipe_page(rem, iter);
7153
7154                 /* Copy the data into the page, so we can start over. */
7155                 ret = trace_seq_to_buffer(&iter->seq,
7156                                           page_address(spd.pages[i]),
7157                                           trace_seq_used(&iter->seq));
7158                 if (ret < 0) {
7159                         __free_page(spd.pages[i]);
7160                         break;
7161                 }
7162                 spd.partial[i].offset = 0;
7163                 spd.partial[i].len = trace_seq_used(&iter->seq);
7164
7165                 trace_seq_init(&iter->seq);
7166         }
7167
7168         trace_access_unlock(iter->cpu_file);
7169         trace_event_read_unlock();
7170         mutex_unlock(&iter->mutex);
7171
7172         spd.nr_pages = i;
7173
7174         if (i)
7175                 ret = splice_to_pipe(pipe, &spd);
7176         else
7177                 ret = 0;
7178 out:
7179         splice_shrink_spd(&spd);
7180         return ret;
7181
7182 out_err:
7183         mutex_unlock(&iter->mutex);
7184         goto out;
7185 }
7186
7187 static ssize_t
7188 tracing_entries_read(struct file *filp, char __user *ubuf,
7189                      size_t cnt, loff_t *ppos)
7190 {
7191         struct inode *inode = file_inode(filp);
7192         struct trace_array *tr = inode->i_private;
7193         int cpu = tracing_get_cpu(inode);
7194         char buf[64];
7195         int r = 0;
7196         ssize_t ret;
7197
7198         mutex_lock(&trace_types_lock);
7199
7200         if (cpu == RING_BUFFER_ALL_CPUS) {
7201                 int cpu, buf_size_same;
7202                 unsigned long size;
7203
7204                 size = 0;
7205                 buf_size_same = 1;
7206                 /* check if all cpu sizes are same */
7207                 for_each_tracing_cpu(cpu) {
7208                         /* fill in the size from first enabled cpu */
7209                         if (size == 0)
7210                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7211                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7212                                 buf_size_same = 0;
7213                                 break;
7214                         }
7215                 }
7216
7217                 if (buf_size_same) {
7218                         if (!tr->ring_buffer_expanded)
7219                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7220                                             size >> 10,
7221                                             trace_buf_size >> 10);
7222                         else
7223                                 r = sprintf(buf, "%lu\n", size >> 10);
7224                 } else
7225                         r = sprintf(buf, "X\n");
7226         } else
7227                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7228
7229         mutex_unlock(&trace_types_lock);
7230
7231         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7232         return ret;
7233 }
7234
7235 static ssize_t
7236 tracing_entries_write(struct file *filp, const char __user *ubuf,
7237                       size_t cnt, loff_t *ppos)
7238 {
7239         struct inode *inode = file_inode(filp);
7240         struct trace_array *tr = inode->i_private;
7241         unsigned long val;
7242         int ret;
7243
7244         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7245         if (ret)
7246                 return ret;
7247
7248         /* must have at least 1 entry */
7249         if (!val)
7250                 return -EINVAL;
7251
7252         /* value is in KB */
7253         val <<= 10;
7254         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7255         if (ret < 0)
7256                 return ret;
7257
7258         *ppos += cnt;
7259
7260         return cnt;
7261 }
7262
7263 static ssize_t
7264 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7265                                 size_t cnt, loff_t *ppos)
7266 {
7267         struct trace_array *tr = filp->private_data;
7268         char buf[64];
7269         int r, cpu;
7270         unsigned long size = 0, expanded_size = 0;
7271
7272         mutex_lock(&trace_types_lock);
7273         for_each_tracing_cpu(cpu) {
7274                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7275                 if (!tr->ring_buffer_expanded)
7276                         expanded_size += trace_buf_size >> 10;
7277         }
7278         if (tr->ring_buffer_expanded)
7279                 r = sprintf(buf, "%lu\n", size);
7280         else
7281                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7282         mutex_unlock(&trace_types_lock);
7283
7284         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7285 }
7286
7287 static ssize_t
7288 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7289                           size_t cnt, loff_t *ppos)
7290 {
7291         /*
7292          * There is no need to read what the user has written, this function
7293          * is just to make sure that there is no error when "echo" is used
7294          */
7295
7296         *ppos += cnt;
7297
7298         return cnt;
7299 }
7300
7301 static int
7302 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7303 {
7304         struct trace_array *tr = inode->i_private;
7305
7306         /* disable tracing ? */
7307         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7308                 tracer_tracing_off(tr);
7309         /* resize the ring buffer to 0 */
7310         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7311
7312         trace_array_put(tr);
7313
7314         return 0;
7315 }
7316
7317 #define TRACE_MARKER_MAX_SIZE           4096
7318
7319 static ssize_t
7320 tracing_mark_write(struct file *filp, const char __user *ubuf,
7321                                         size_t cnt, loff_t *fpos)
7322 {
7323         struct trace_array *tr = filp->private_data;
7324         struct ring_buffer_event *event;
7325         enum event_trigger_type tt = ETT_NONE;
7326         struct trace_buffer *buffer;
7327         struct print_entry *entry;
7328         int meta_size;
7329         ssize_t written;
7330         size_t size;
7331         int len;
7332
7333 /* Used in tracing_mark_raw_write() as well */
7334 #define FAULTED_STR "<faulted>"
7335 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7336
7337         if (tracing_disabled)
7338                 return -EINVAL;
7339
7340         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7341                 return -EINVAL;
7342
7343         if ((ssize_t)cnt < 0)
7344                 return -EINVAL;
7345
7346         if (cnt > TRACE_MARKER_MAX_SIZE)
7347                 cnt = TRACE_MARKER_MAX_SIZE;
7348
7349         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7350  again:
7351         size = cnt + meta_size;
7352
7353         /* If less than "<faulted>", then make sure we can still add that */
7354         if (cnt < FAULTED_SIZE)
7355                 size += FAULTED_SIZE - cnt;
7356
7357         buffer = tr->array_buffer.buffer;
7358         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7359                                             tracing_gen_ctx());
7360         if (unlikely(!event)) {
7361                 /*
7362                  * If the size was greater than what was allowed, then
7363                  * make it smaller and try again.
7364                  */
7365                 if (size > ring_buffer_max_event_size(buffer)) {
7366                         /* cnt < FAULTED size should never be bigger than max */
7367                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7368                                 return -EBADF;
7369                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7370                         /* The above should only happen once */
7371                         if (WARN_ON_ONCE(cnt + meta_size == size))
7372                                 return -EBADF;
7373                         goto again;
7374                 }
7375
7376                 /* Ring buffer disabled, return as if not open for write */
7377                 return -EBADF;
7378         }
7379
7380         entry = ring_buffer_event_data(event);
7381         entry->ip = _THIS_IP_;
7382
7383         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7384         if (len) {
7385                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7386                 cnt = FAULTED_SIZE;
7387                 written = -EFAULT;
7388         } else
7389                 written = cnt;
7390
7391         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7392                 /* do not add \n before testing triggers, but add \0 */
7393                 entry->buf[cnt] = '\0';
7394                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7395         }
7396
7397         if (entry->buf[cnt - 1] != '\n') {
7398                 entry->buf[cnt] = '\n';
7399                 entry->buf[cnt + 1] = '\0';
7400         } else
7401                 entry->buf[cnt] = '\0';
7402
7403         if (static_branch_unlikely(&trace_marker_exports_enabled))
7404                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7405         __buffer_unlock_commit(buffer, event);
7406
7407         if (tt)
7408                 event_triggers_post_call(tr->trace_marker_file, tt);
7409
7410         return written;
7411 }
7412
7413 static ssize_t
7414 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7415                                         size_t cnt, loff_t *fpos)
7416 {
7417         struct trace_array *tr = filp->private_data;
7418         struct ring_buffer_event *event;
7419         struct trace_buffer *buffer;
7420         struct raw_data_entry *entry;
7421         ssize_t written;
7422         int size;
7423         int len;
7424
7425 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7426
7427         if (tracing_disabled)
7428                 return -EINVAL;
7429
7430         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7431                 return -EINVAL;
7432
7433         /* The marker must at least have a tag id */
7434         if (cnt < sizeof(unsigned int))
7435                 return -EINVAL;
7436
7437         size = sizeof(*entry) + cnt;
7438         if (cnt < FAULT_SIZE_ID)
7439                 size += FAULT_SIZE_ID - cnt;
7440
7441         buffer = tr->array_buffer.buffer;
7442
7443         if (size > ring_buffer_max_event_size(buffer))
7444                 return -EINVAL;
7445
7446         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7447                                             tracing_gen_ctx());
7448         if (!event)
7449                 /* Ring buffer disabled, return as if not open for write */
7450                 return -EBADF;
7451
7452         entry = ring_buffer_event_data(event);
7453
7454         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7455         if (len) {
7456                 entry->id = -1;
7457                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7458                 written = -EFAULT;
7459         } else
7460                 written = cnt;
7461
7462         __buffer_unlock_commit(buffer, event);
7463
7464         return written;
7465 }
7466
7467 static int tracing_clock_show(struct seq_file *m, void *v)
7468 {
7469         struct trace_array *tr = m->private;
7470         int i;
7471
7472         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7473                 seq_printf(m,
7474                         "%s%s%s%s", i ? " " : "",
7475                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7476                         i == tr->clock_id ? "]" : "");
7477         seq_putc(m, '\n');
7478
7479         return 0;
7480 }
7481
7482 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7483 {
7484         int i;
7485
7486         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7487                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7488                         break;
7489         }
7490         if (i == ARRAY_SIZE(trace_clocks))
7491                 return -EINVAL;
7492
7493         mutex_lock(&trace_types_lock);
7494
7495         tr->clock_id = i;
7496
7497         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7498
7499         /*
7500          * New clock may not be consistent with the previous clock.
7501          * Reset the buffer so that it doesn't have incomparable timestamps.
7502          */
7503         tracing_reset_online_cpus(&tr->array_buffer);
7504
7505 #ifdef CONFIG_TRACER_MAX_TRACE
7506         if (tr->max_buffer.buffer)
7507                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7508         tracing_reset_online_cpus(&tr->max_buffer);
7509 #endif
7510
7511         mutex_unlock(&trace_types_lock);
7512
7513         return 0;
7514 }
7515
7516 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7517                                    size_t cnt, loff_t *fpos)
7518 {
7519         struct seq_file *m = filp->private_data;
7520         struct trace_array *tr = m->private;
7521         char buf[64];
7522         const char *clockstr;
7523         int ret;
7524
7525         if (cnt >= sizeof(buf))
7526                 return -EINVAL;
7527
7528         if (copy_from_user(buf, ubuf, cnt))
7529                 return -EFAULT;
7530
7531         buf[cnt] = 0;
7532
7533         clockstr = strstrip(buf);
7534
7535         ret = tracing_set_clock(tr, clockstr);
7536         if (ret)
7537                 return ret;
7538
7539         *fpos += cnt;
7540
7541         return cnt;
7542 }
7543
7544 static int tracing_clock_open(struct inode *inode, struct file *file)
7545 {
7546         struct trace_array *tr = inode->i_private;
7547         int ret;
7548
7549         ret = tracing_check_open_get_tr(tr);
7550         if (ret)
7551                 return ret;
7552
7553         ret = single_open(file, tracing_clock_show, inode->i_private);
7554         if (ret < 0)
7555                 trace_array_put(tr);
7556
7557         return ret;
7558 }
7559
7560 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7561 {
7562         struct trace_array *tr = m->private;
7563
7564         mutex_lock(&trace_types_lock);
7565
7566         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7567                 seq_puts(m, "delta [absolute]\n");
7568         else
7569                 seq_puts(m, "[delta] absolute\n");
7570
7571         mutex_unlock(&trace_types_lock);
7572
7573         return 0;
7574 }
7575
7576 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7577 {
7578         struct trace_array *tr = inode->i_private;
7579         int ret;
7580
7581         ret = tracing_check_open_get_tr(tr);
7582         if (ret)
7583                 return ret;
7584
7585         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7586         if (ret < 0)
7587                 trace_array_put(tr);
7588
7589         return ret;
7590 }
7591
7592 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7593 {
7594         if (rbe == this_cpu_read(trace_buffered_event))
7595                 return ring_buffer_time_stamp(buffer);
7596
7597         return ring_buffer_event_time_stamp(buffer, rbe);
7598 }
7599
7600 /*
7601  * Set or disable using the per CPU trace_buffer_event when possible.
7602  */
7603 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7604 {
7605         int ret = 0;
7606
7607         mutex_lock(&trace_types_lock);
7608
7609         if (set && tr->no_filter_buffering_ref++)
7610                 goto out;
7611
7612         if (!set) {
7613                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7614                         ret = -EINVAL;
7615                         goto out;
7616                 }
7617
7618                 --tr->no_filter_buffering_ref;
7619         }
7620  out:
7621         mutex_unlock(&trace_types_lock);
7622
7623         return ret;
7624 }
7625
7626 struct ftrace_buffer_info {
7627         struct trace_iterator   iter;
7628         void                    *spare;
7629         unsigned int            spare_cpu;
7630         unsigned int            spare_size;
7631         unsigned int            read;
7632 };
7633
7634 #ifdef CONFIG_TRACER_SNAPSHOT
7635 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7636 {
7637         struct trace_array *tr = inode->i_private;
7638         struct trace_iterator *iter;
7639         struct seq_file *m;
7640         int ret;
7641
7642         ret = tracing_check_open_get_tr(tr);
7643         if (ret)
7644                 return ret;
7645
7646         if (file->f_mode & FMODE_READ) {
7647                 iter = __tracing_open(inode, file, true);
7648                 if (IS_ERR(iter))
7649                         ret = PTR_ERR(iter);
7650         } else {
7651                 /* Writes still need the seq_file to hold the private data */
7652                 ret = -ENOMEM;
7653                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7654                 if (!m)
7655                         goto out;
7656                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7657                 if (!iter) {
7658                         kfree(m);
7659                         goto out;
7660                 }
7661                 ret = 0;
7662
7663                 iter->tr = tr;
7664                 iter->array_buffer = &tr->max_buffer;
7665                 iter->cpu_file = tracing_get_cpu(inode);
7666                 m->private = iter;
7667                 file->private_data = m;
7668         }
7669 out:
7670         if (ret < 0)
7671                 trace_array_put(tr);
7672
7673         return ret;
7674 }
7675
7676 static void tracing_swap_cpu_buffer(void *tr)
7677 {
7678         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7679 }
7680
7681 static ssize_t
7682 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7683                        loff_t *ppos)
7684 {
7685         struct seq_file *m = filp->private_data;
7686         struct trace_iterator *iter = m->private;
7687         struct trace_array *tr = iter->tr;
7688         unsigned long val;
7689         int ret;
7690
7691         ret = tracing_update_buffers(tr);
7692         if (ret < 0)
7693                 return ret;
7694
7695         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7696         if (ret)
7697                 return ret;
7698
7699         mutex_lock(&trace_types_lock);
7700
7701         if (tr->current_trace->use_max_tr) {
7702                 ret = -EBUSY;
7703                 goto out;
7704         }
7705
7706         local_irq_disable();
7707         arch_spin_lock(&tr->max_lock);
7708         if (tr->cond_snapshot)
7709                 ret = -EBUSY;
7710         arch_spin_unlock(&tr->max_lock);
7711         local_irq_enable();
7712         if (ret)
7713                 goto out;
7714
7715         switch (val) {
7716         case 0:
7717                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7718                         ret = -EINVAL;
7719                         break;
7720                 }
7721                 if (tr->allocated_snapshot)
7722                         free_snapshot(tr);
7723                 break;
7724         case 1:
7725 /* Only allow per-cpu swap if the ring buffer supports it */
7726 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7727                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7728                         ret = -EINVAL;
7729                         break;
7730                 }
7731 #endif
7732                 if (tr->allocated_snapshot)
7733                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7734                                         &tr->array_buffer, iter->cpu_file);
7735                 else
7736                         ret = tracing_alloc_snapshot_instance(tr);
7737                 if (ret < 0)
7738                         break;
7739                 /* Now, we're going to swap */
7740                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7741                         local_irq_disable();
7742                         update_max_tr(tr, current, smp_processor_id(), NULL);
7743                         local_irq_enable();
7744                 } else {
7745                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7746                                                  (void *)tr, 1);
7747                 }
7748                 break;
7749         default:
7750                 if (tr->allocated_snapshot) {
7751                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7752                                 tracing_reset_online_cpus(&tr->max_buffer);
7753                         else
7754                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7755                 }
7756                 break;
7757         }
7758
7759         if (ret >= 0) {
7760                 *ppos += cnt;
7761                 ret = cnt;
7762         }
7763 out:
7764         mutex_unlock(&trace_types_lock);
7765         return ret;
7766 }
7767
7768 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7769 {
7770         struct seq_file *m = file->private_data;
7771         int ret;
7772
7773         ret = tracing_release(inode, file);
7774
7775         if (file->f_mode & FMODE_READ)
7776                 return ret;
7777
7778         /* If write only, the seq_file is just a stub */
7779         if (m)
7780                 kfree(m->private);
7781         kfree(m);
7782
7783         return 0;
7784 }
7785
7786 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7787 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7788                                     size_t count, loff_t *ppos);
7789 static int tracing_buffers_release(struct inode *inode, struct file *file);
7790 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7791                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7792
7793 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7794 {
7795         struct ftrace_buffer_info *info;
7796         int ret;
7797
7798         /* The following checks for tracefs lockdown */
7799         ret = tracing_buffers_open(inode, filp);
7800         if (ret < 0)
7801                 return ret;
7802
7803         info = filp->private_data;
7804
7805         if (info->iter.trace->use_max_tr) {
7806                 tracing_buffers_release(inode, filp);
7807                 return -EBUSY;
7808         }
7809
7810         info->iter.snapshot = true;
7811         info->iter.array_buffer = &info->iter.tr->max_buffer;
7812
7813         return ret;
7814 }
7815
7816 #endif /* CONFIG_TRACER_SNAPSHOT */
7817
7818
7819 static const struct file_operations tracing_thresh_fops = {
7820         .open           = tracing_open_generic,
7821         .read           = tracing_thresh_read,
7822         .write          = tracing_thresh_write,
7823         .llseek         = generic_file_llseek,
7824 };
7825
7826 #ifdef CONFIG_TRACER_MAX_TRACE
7827 static const struct file_operations tracing_max_lat_fops = {
7828         .open           = tracing_open_generic_tr,
7829         .read           = tracing_max_lat_read,
7830         .write          = tracing_max_lat_write,
7831         .llseek         = generic_file_llseek,
7832         .release        = tracing_release_generic_tr,
7833 };
7834 #endif
7835
7836 static const struct file_operations set_tracer_fops = {
7837         .open           = tracing_open_generic_tr,
7838         .read           = tracing_set_trace_read,
7839         .write          = tracing_set_trace_write,
7840         .llseek         = generic_file_llseek,
7841         .release        = tracing_release_generic_tr,
7842 };
7843
7844 static const struct file_operations tracing_pipe_fops = {
7845         .open           = tracing_open_pipe,
7846         .poll           = tracing_poll_pipe,
7847         .read           = tracing_read_pipe,
7848         .splice_read    = tracing_splice_read_pipe,
7849         .release        = tracing_release_pipe,
7850         .llseek         = no_llseek,
7851 };
7852
7853 static const struct file_operations tracing_entries_fops = {
7854         .open           = tracing_open_generic_tr,
7855         .read           = tracing_entries_read,
7856         .write          = tracing_entries_write,
7857         .llseek         = generic_file_llseek,
7858         .release        = tracing_release_generic_tr,
7859 };
7860
7861 static const struct file_operations tracing_total_entries_fops = {
7862         .open           = tracing_open_generic_tr,
7863         .read           = tracing_total_entries_read,
7864         .llseek         = generic_file_llseek,
7865         .release        = tracing_release_generic_tr,
7866 };
7867
7868 static const struct file_operations tracing_free_buffer_fops = {
7869         .open           = tracing_open_generic_tr,
7870         .write          = tracing_free_buffer_write,
7871         .release        = tracing_free_buffer_release,
7872 };
7873
7874 static const struct file_operations tracing_mark_fops = {
7875         .open           = tracing_mark_open,
7876         .write          = tracing_mark_write,
7877         .release        = tracing_release_generic_tr,
7878 };
7879
7880 static const struct file_operations tracing_mark_raw_fops = {
7881         .open           = tracing_mark_open,
7882         .write          = tracing_mark_raw_write,
7883         .release        = tracing_release_generic_tr,
7884 };
7885
7886 static const struct file_operations trace_clock_fops = {
7887         .open           = tracing_clock_open,
7888         .read           = seq_read,
7889         .llseek         = seq_lseek,
7890         .release        = tracing_single_release_tr,
7891         .write          = tracing_clock_write,
7892 };
7893
7894 static const struct file_operations trace_time_stamp_mode_fops = {
7895         .open           = tracing_time_stamp_mode_open,
7896         .read           = seq_read,
7897         .llseek         = seq_lseek,
7898         .release        = tracing_single_release_tr,
7899 };
7900
7901 #ifdef CONFIG_TRACER_SNAPSHOT
7902 static const struct file_operations snapshot_fops = {
7903         .open           = tracing_snapshot_open,
7904         .read           = seq_read,
7905         .write          = tracing_snapshot_write,
7906         .llseek         = tracing_lseek,
7907         .release        = tracing_snapshot_release,
7908 };
7909
7910 static const struct file_operations snapshot_raw_fops = {
7911         .open           = snapshot_raw_open,
7912         .read           = tracing_buffers_read,
7913         .release        = tracing_buffers_release,
7914         .splice_read    = tracing_buffers_splice_read,
7915         .llseek         = no_llseek,
7916 };
7917
7918 #endif /* CONFIG_TRACER_SNAPSHOT */
7919
7920 /*
7921  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7922  * @filp: The active open file structure
7923  * @ubuf: The userspace provided buffer to read value into
7924  * @cnt: The maximum number of bytes to read
7925  * @ppos: The current "file" position
7926  *
7927  * This function implements the write interface for a struct trace_min_max_param.
7928  * The filp->private_data must point to a trace_min_max_param structure that
7929  * defines where to write the value, the min and the max acceptable values,
7930  * and a lock to protect the write.
7931  */
7932 static ssize_t
7933 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7934 {
7935         struct trace_min_max_param *param = filp->private_data;
7936         u64 val;
7937         int err;
7938
7939         if (!param)
7940                 return -EFAULT;
7941
7942         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7943         if (err)
7944                 return err;
7945
7946         if (param->lock)
7947                 mutex_lock(param->lock);
7948
7949         if (param->min && val < *param->min)
7950                 err = -EINVAL;
7951
7952         if (param->max && val > *param->max)
7953                 err = -EINVAL;
7954
7955         if (!err)
7956                 *param->val = val;
7957
7958         if (param->lock)
7959                 mutex_unlock(param->lock);
7960
7961         if (err)
7962                 return err;
7963
7964         return cnt;
7965 }
7966
7967 /*
7968  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7969  * @filp: The active open file structure
7970  * @ubuf: The userspace provided buffer to read value into
7971  * @cnt: The maximum number of bytes to read
7972  * @ppos: The current "file" position
7973  *
7974  * This function implements the read interface for a struct trace_min_max_param.
7975  * The filp->private_data must point to a trace_min_max_param struct with valid
7976  * data.
7977  */
7978 static ssize_t
7979 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7980 {
7981         struct trace_min_max_param *param = filp->private_data;
7982         char buf[U64_STR_SIZE];
7983         int len;
7984         u64 val;
7985
7986         if (!param)
7987                 return -EFAULT;
7988
7989         val = *param->val;
7990
7991         if (cnt > sizeof(buf))
7992                 cnt = sizeof(buf);
7993
7994         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7995
7996         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7997 }
7998
7999 const struct file_operations trace_min_max_fops = {
8000         .open           = tracing_open_generic,
8001         .read           = trace_min_max_read,
8002         .write          = trace_min_max_write,
8003 };
8004
8005 #define TRACING_LOG_ERRS_MAX    8
8006 #define TRACING_LOG_LOC_MAX     128
8007
8008 #define CMD_PREFIX "  Command: "
8009
8010 struct err_info {
8011         const char      **errs; /* ptr to loc-specific array of err strings */
8012         u8              type;   /* index into errs -> specific err string */
8013         u16             pos;    /* caret position */
8014         u64             ts;
8015 };
8016
8017 struct tracing_log_err {
8018         struct list_head        list;
8019         struct err_info         info;
8020         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
8021         char                    *cmd;                     /* what caused err */
8022 };
8023
8024 static DEFINE_MUTEX(tracing_err_log_lock);
8025
8026 static struct tracing_log_err *alloc_tracing_log_err(int len)
8027 {
8028         struct tracing_log_err *err;
8029
8030         err = kzalloc(sizeof(*err), GFP_KERNEL);
8031         if (!err)
8032                 return ERR_PTR(-ENOMEM);
8033
8034         err->cmd = kzalloc(len, GFP_KERNEL);
8035         if (!err->cmd) {
8036                 kfree(err);
8037                 return ERR_PTR(-ENOMEM);
8038         }
8039
8040         return err;
8041 }
8042
8043 static void free_tracing_log_err(struct tracing_log_err *err)
8044 {
8045         kfree(err->cmd);
8046         kfree(err);
8047 }
8048
8049 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8050                                                    int len)
8051 {
8052         struct tracing_log_err *err;
8053         char *cmd;
8054
8055         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8056                 err = alloc_tracing_log_err(len);
8057                 if (PTR_ERR(err) != -ENOMEM)
8058                         tr->n_err_log_entries++;
8059
8060                 return err;
8061         }
8062         cmd = kzalloc(len, GFP_KERNEL);
8063         if (!cmd)
8064                 return ERR_PTR(-ENOMEM);
8065         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8066         kfree(err->cmd);
8067         err->cmd = cmd;
8068         list_del(&err->list);
8069
8070         return err;
8071 }
8072
8073 /**
8074  * err_pos - find the position of a string within a command for error careting
8075  * @cmd: The tracing command that caused the error
8076  * @str: The string to position the caret at within @cmd
8077  *
8078  * Finds the position of the first occurrence of @str within @cmd.  The
8079  * return value can be passed to tracing_log_err() for caret placement
8080  * within @cmd.
8081  *
8082  * Returns the index within @cmd of the first occurrence of @str or 0
8083  * if @str was not found.
8084  */
8085 unsigned int err_pos(char *cmd, const char *str)
8086 {
8087         char *found;
8088
8089         if (WARN_ON(!strlen(cmd)))
8090                 return 0;
8091
8092         found = strstr(cmd, str);
8093         if (found)
8094                 return found - cmd;
8095
8096         return 0;
8097 }
8098
8099 /**
8100  * tracing_log_err - write an error to the tracing error log
8101  * @tr: The associated trace array for the error (NULL for top level array)
8102  * @loc: A string describing where the error occurred
8103  * @cmd: The tracing command that caused the error
8104  * @errs: The array of loc-specific static error strings
8105  * @type: The index into errs[], which produces the specific static err string
8106  * @pos: The position the caret should be placed in the cmd
8107  *
8108  * Writes an error into tracing/error_log of the form:
8109  *
8110  * <loc>: error: <text>
8111  *   Command: <cmd>
8112  *              ^
8113  *
8114  * tracing/error_log is a small log file containing the last
8115  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8116  * unless there has been a tracing error, and the error log can be
8117  * cleared and have its memory freed by writing the empty string in
8118  * truncation mode to it i.e. echo > tracing/error_log.
8119  *
8120  * NOTE: the @errs array along with the @type param are used to
8121  * produce a static error string - this string is not copied and saved
8122  * when the error is logged - only a pointer to it is saved.  See
8123  * existing callers for examples of how static strings are typically
8124  * defined for use with tracing_log_err().
8125  */
8126 void tracing_log_err(struct trace_array *tr,
8127                      const char *loc, const char *cmd,
8128                      const char **errs, u8 type, u16 pos)
8129 {
8130         struct tracing_log_err *err;
8131         int len = 0;
8132
8133         if (!tr)
8134                 tr = &global_trace;
8135
8136         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8137
8138         mutex_lock(&tracing_err_log_lock);
8139         err = get_tracing_log_err(tr, len);
8140         if (PTR_ERR(err) == -ENOMEM) {
8141                 mutex_unlock(&tracing_err_log_lock);
8142                 return;
8143         }
8144
8145         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8146         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8147
8148         err->info.errs = errs;
8149         err->info.type = type;
8150         err->info.pos = pos;
8151         err->info.ts = local_clock();
8152
8153         list_add_tail(&err->list, &tr->err_log);
8154         mutex_unlock(&tracing_err_log_lock);
8155 }
8156
8157 static void clear_tracing_err_log(struct trace_array *tr)
8158 {
8159         struct tracing_log_err *err, *next;
8160
8161         mutex_lock(&tracing_err_log_lock);
8162         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8163                 list_del(&err->list);
8164                 free_tracing_log_err(err);
8165         }
8166
8167         tr->n_err_log_entries = 0;
8168         mutex_unlock(&tracing_err_log_lock);
8169 }
8170
8171 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8172 {
8173         struct trace_array *tr = m->private;
8174
8175         mutex_lock(&tracing_err_log_lock);
8176
8177         return seq_list_start(&tr->err_log, *pos);
8178 }
8179
8180 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8181 {
8182         struct trace_array *tr = m->private;
8183
8184         return seq_list_next(v, &tr->err_log, pos);
8185 }
8186
8187 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8188 {
8189         mutex_unlock(&tracing_err_log_lock);
8190 }
8191
8192 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8193 {
8194         u16 i;
8195
8196         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8197                 seq_putc(m, ' ');
8198         for (i = 0; i < pos; i++)
8199                 seq_putc(m, ' ');
8200         seq_puts(m, "^\n");
8201 }
8202
8203 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8204 {
8205         struct tracing_log_err *err = v;
8206
8207         if (err) {
8208                 const char *err_text = err->info.errs[err->info.type];
8209                 u64 sec = err->info.ts;
8210                 u32 nsec;
8211
8212                 nsec = do_div(sec, NSEC_PER_SEC);
8213                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8214                            err->loc, err_text);
8215                 seq_printf(m, "%s", err->cmd);
8216                 tracing_err_log_show_pos(m, err->info.pos);
8217         }
8218
8219         return 0;
8220 }
8221
8222 static const struct seq_operations tracing_err_log_seq_ops = {
8223         .start  = tracing_err_log_seq_start,
8224         .next   = tracing_err_log_seq_next,
8225         .stop   = tracing_err_log_seq_stop,
8226         .show   = tracing_err_log_seq_show
8227 };
8228
8229 static int tracing_err_log_open(struct inode *inode, struct file *file)
8230 {
8231         struct trace_array *tr = inode->i_private;
8232         int ret = 0;
8233
8234         ret = tracing_check_open_get_tr(tr);
8235         if (ret)
8236                 return ret;
8237
8238         /* If this file was opened for write, then erase contents */
8239         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8240                 clear_tracing_err_log(tr);
8241
8242         if (file->f_mode & FMODE_READ) {
8243                 ret = seq_open(file, &tracing_err_log_seq_ops);
8244                 if (!ret) {
8245                         struct seq_file *m = file->private_data;
8246                         m->private = tr;
8247                 } else {
8248                         trace_array_put(tr);
8249                 }
8250         }
8251         return ret;
8252 }
8253
8254 static ssize_t tracing_err_log_write(struct file *file,
8255                                      const char __user *buffer,
8256                                      size_t count, loff_t *ppos)
8257 {
8258         return count;
8259 }
8260
8261 static int tracing_err_log_release(struct inode *inode, struct file *file)
8262 {
8263         struct trace_array *tr = inode->i_private;
8264
8265         trace_array_put(tr);
8266
8267         if (file->f_mode & FMODE_READ)
8268                 seq_release(inode, file);
8269
8270         return 0;
8271 }
8272
8273 static const struct file_operations tracing_err_log_fops = {
8274         .open           = tracing_err_log_open,
8275         .write          = tracing_err_log_write,
8276         .read           = seq_read,
8277         .llseek         = tracing_lseek,
8278         .release        = tracing_err_log_release,
8279 };
8280
8281 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8282 {
8283         struct trace_array *tr = inode->i_private;
8284         struct ftrace_buffer_info *info;
8285         int ret;
8286
8287         ret = tracing_check_open_get_tr(tr);
8288         if (ret)
8289                 return ret;
8290
8291         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8292         if (!info) {
8293                 trace_array_put(tr);
8294                 return -ENOMEM;
8295         }
8296
8297         mutex_lock(&trace_types_lock);
8298
8299         info->iter.tr           = tr;
8300         info->iter.cpu_file     = tracing_get_cpu(inode);
8301         info->iter.trace        = tr->current_trace;
8302         info->iter.array_buffer = &tr->array_buffer;
8303         info->spare             = NULL;
8304         /* Force reading ring buffer for first read */
8305         info->read              = (unsigned int)-1;
8306
8307         filp->private_data = info;
8308
8309         tr->trace_ref++;
8310
8311         mutex_unlock(&trace_types_lock);
8312
8313         ret = nonseekable_open(inode, filp);
8314         if (ret < 0)
8315                 trace_array_put(tr);
8316
8317         return ret;
8318 }
8319
8320 static __poll_t
8321 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8322 {
8323         struct ftrace_buffer_info *info = filp->private_data;
8324         struct trace_iterator *iter = &info->iter;
8325
8326         return trace_poll(iter, filp, poll_table);
8327 }
8328
8329 static ssize_t
8330 tracing_buffers_read(struct file *filp, char __user *ubuf,
8331                      size_t count, loff_t *ppos)
8332 {
8333         struct ftrace_buffer_info *info = filp->private_data;
8334         struct trace_iterator *iter = &info->iter;
8335         void *trace_data;
8336         int page_size;
8337         ssize_t ret = 0;
8338         ssize_t size;
8339
8340         if (!count)
8341                 return 0;
8342
8343 #ifdef CONFIG_TRACER_MAX_TRACE
8344         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8345                 return -EBUSY;
8346 #endif
8347
8348         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8349
8350         /* Make sure the spare matches the current sub buffer size */
8351         if (info->spare) {
8352                 if (page_size != info->spare_size) {
8353                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8354                                                    info->spare_cpu, info->spare);
8355                         info->spare = NULL;
8356                 }
8357         }
8358
8359         if (!info->spare) {
8360                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8361                                                           iter->cpu_file);
8362                 if (IS_ERR(info->spare)) {
8363                         ret = PTR_ERR(info->spare);
8364                         info->spare = NULL;
8365                 } else {
8366                         info->spare_cpu = iter->cpu_file;
8367                         info->spare_size = page_size;
8368                 }
8369         }
8370         if (!info->spare)
8371                 return ret;
8372
8373         /* Do we have previous read data to read? */
8374         if (info->read < page_size)
8375                 goto read;
8376
8377  again:
8378         trace_access_lock(iter->cpu_file);
8379         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8380                                     info->spare,
8381                                     count,
8382                                     iter->cpu_file, 0);
8383         trace_access_unlock(iter->cpu_file);
8384
8385         if (ret < 0) {
8386                 if (trace_empty(iter)) {
8387                         if ((filp->f_flags & O_NONBLOCK))
8388                                 return -EAGAIN;
8389
8390                         ret = wait_on_pipe(iter, 0);
8391                         if (ret)
8392                                 return ret;
8393
8394                         goto again;
8395                 }
8396                 return 0;
8397         }
8398
8399         info->read = 0;
8400  read:
8401         size = page_size - info->read;
8402         if (size > count)
8403                 size = count;
8404         trace_data = ring_buffer_read_page_data(info->spare);
8405         ret = copy_to_user(ubuf, trace_data + info->read, size);
8406         if (ret == size)
8407                 return -EFAULT;
8408
8409         size -= ret;
8410
8411         *ppos += size;
8412         info->read += size;
8413
8414         return size;
8415 }
8416
8417 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8418 {
8419         struct ftrace_buffer_info *info = file->private_data;
8420         struct trace_iterator *iter = &info->iter;
8421
8422         iter->closed = true;
8423         /* Make sure the waiters see the new wait_index */
8424         (void)atomic_fetch_inc_release(&iter->wait_index);
8425
8426         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8427
8428         return 0;
8429 }
8430
8431 static int tracing_buffers_release(struct inode *inode, struct file *file)
8432 {
8433         struct ftrace_buffer_info *info = file->private_data;
8434         struct trace_iterator *iter = &info->iter;
8435
8436         mutex_lock(&trace_types_lock);
8437
8438         iter->tr->trace_ref--;
8439
8440         __trace_array_put(iter->tr);
8441
8442         if (info->spare)
8443                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8444                                            info->spare_cpu, info->spare);
8445         kvfree(info);
8446
8447         mutex_unlock(&trace_types_lock);
8448
8449         return 0;
8450 }
8451
8452 struct buffer_ref {
8453         struct trace_buffer     *buffer;
8454         void                    *page;
8455         int                     cpu;
8456         refcount_t              refcount;
8457 };
8458
8459 static void buffer_ref_release(struct buffer_ref *ref)
8460 {
8461         if (!refcount_dec_and_test(&ref->refcount))
8462                 return;
8463         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8464         kfree(ref);
8465 }
8466
8467 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8468                                     struct pipe_buffer *buf)
8469 {
8470         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8471
8472         buffer_ref_release(ref);
8473         buf->private = 0;
8474 }
8475
8476 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8477                                 struct pipe_buffer *buf)
8478 {
8479         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8480
8481         if (refcount_read(&ref->refcount) > INT_MAX/2)
8482                 return false;
8483
8484         refcount_inc(&ref->refcount);
8485         return true;
8486 }
8487
8488 /* Pipe buffer operations for a buffer. */
8489 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8490         .release                = buffer_pipe_buf_release,
8491         .get                    = buffer_pipe_buf_get,
8492 };
8493
8494 /*
8495  * Callback from splice_to_pipe(), if we need to release some pages
8496  * at the end of the spd in case we error'ed out in filling the pipe.
8497  */
8498 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8499 {
8500         struct buffer_ref *ref =
8501                 (struct buffer_ref *)spd->partial[i].private;
8502
8503         buffer_ref_release(ref);
8504         spd->partial[i].private = 0;
8505 }
8506
8507 static ssize_t
8508 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8509                             struct pipe_inode_info *pipe, size_t len,
8510                             unsigned int flags)
8511 {
8512         struct ftrace_buffer_info *info = file->private_data;
8513         struct trace_iterator *iter = &info->iter;
8514         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8515         struct page *pages_def[PIPE_DEF_BUFFERS];
8516         struct splice_pipe_desc spd = {
8517                 .pages          = pages_def,
8518                 .partial        = partial_def,
8519                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8520                 .ops            = &buffer_pipe_buf_ops,
8521                 .spd_release    = buffer_spd_release,
8522         };
8523         struct buffer_ref *ref;
8524         bool woken = false;
8525         int page_size;
8526         int entries, i;
8527         ssize_t ret = 0;
8528
8529 #ifdef CONFIG_TRACER_MAX_TRACE
8530         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8531                 return -EBUSY;
8532 #endif
8533
8534         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8535         if (*ppos & (page_size - 1))
8536                 return -EINVAL;
8537
8538         if (len & (page_size - 1)) {
8539                 if (len < page_size)
8540                         return -EINVAL;
8541                 len &= (~(page_size - 1));
8542         }
8543
8544         if (splice_grow_spd(pipe, &spd))
8545                 return -ENOMEM;
8546
8547  again:
8548         trace_access_lock(iter->cpu_file);
8549         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8550
8551         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8552                 struct page *page;
8553                 int r;
8554
8555                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8556                 if (!ref) {
8557                         ret = -ENOMEM;
8558                         break;
8559                 }
8560
8561                 refcount_set(&ref->refcount, 1);
8562                 ref->buffer = iter->array_buffer->buffer;
8563                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8564                 if (IS_ERR(ref->page)) {
8565                         ret = PTR_ERR(ref->page);
8566                         ref->page = NULL;
8567                         kfree(ref);
8568                         break;
8569                 }
8570                 ref->cpu = iter->cpu_file;
8571
8572                 r = ring_buffer_read_page(ref->buffer, ref->page,
8573                                           len, iter->cpu_file, 1);
8574                 if (r < 0) {
8575                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8576                                                    ref->page);
8577                         kfree(ref);
8578                         break;
8579                 }
8580
8581                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8582
8583                 spd.pages[i] = page;
8584                 spd.partial[i].len = page_size;
8585                 spd.partial[i].offset = 0;
8586                 spd.partial[i].private = (unsigned long)ref;
8587                 spd.nr_pages++;
8588                 *ppos += page_size;
8589
8590                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8591         }
8592
8593         trace_access_unlock(iter->cpu_file);
8594         spd.nr_pages = i;
8595
8596         /* did we read anything? */
8597         if (!spd.nr_pages) {
8598
8599                 if (ret)
8600                         goto out;
8601
8602                 if (woken)
8603                         goto out;
8604
8605                 ret = -EAGAIN;
8606                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8607                         goto out;
8608
8609                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8610                 if (ret)
8611                         goto out;
8612
8613                 /* No need to wait after waking up when tracing is off */
8614                 if (!tracer_tracing_is_on(iter->tr))
8615                         goto out;
8616
8617                 /* Iterate one more time to collect any new data then exit */
8618                 woken = true;
8619
8620                 goto again;
8621         }
8622
8623         ret = splice_to_pipe(pipe, &spd);
8624 out:
8625         splice_shrink_spd(&spd);
8626
8627         return ret;
8628 }
8629
8630 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8631 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8632 {
8633         struct ftrace_buffer_info *info = file->private_data;
8634         struct trace_iterator *iter = &info->iter;
8635
8636         if (cmd)
8637                 return -ENOIOCTLCMD;
8638
8639         mutex_lock(&trace_types_lock);
8640
8641         /* Make sure the waiters see the new wait_index */
8642         (void)atomic_fetch_inc_release(&iter->wait_index);
8643
8644         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8645
8646         mutex_unlock(&trace_types_lock);
8647         return 0;
8648 }
8649
8650 static const struct file_operations tracing_buffers_fops = {
8651         .open           = tracing_buffers_open,
8652         .read           = tracing_buffers_read,
8653         .poll           = tracing_buffers_poll,
8654         .release        = tracing_buffers_release,
8655         .flush          = tracing_buffers_flush,
8656         .splice_read    = tracing_buffers_splice_read,
8657         .unlocked_ioctl = tracing_buffers_ioctl,
8658         .llseek         = no_llseek,
8659 };
8660
8661 static ssize_t
8662 tracing_stats_read(struct file *filp, char __user *ubuf,
8663                    size_t count, loff_t *ppos)
8664 {
8665         struct inode *inode = file_inode(filp);
8666         struct trace_array *tr = inode->i_private;
8667         struct array_buffer *trace_buf = &tr->array_buffer;
8668         int cpu = tracing_get_cpu(inode);
8669         struct trace_seq *s;
8670         unsigned long cnt;
8671         unsigned long long t;
8672         unsigned long usec_rem;
8673
8674         s = kmalloc(sizeof(*s), GFP_KERNEL);
8675         if (!s)
8676                 return -ENOMEM;
8677
8678         trace_seq_init(s);
8679
8680         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8681         trace_seq_printf(s, "entries: %ld\n", cnt);
8682
8683         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8684         trace_seq_printf(s, "overrun: %ld\n", cnt);
8685
8686         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8687         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8688
8689         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8690         trace_seq_printf(s, "bytes: %ld\n", cnt);
8691
8692         if (trace_clocks[tr->clock_id].in_ns) {
8693                 /* local or global for trace_clock */
8694                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8695                 usec_rem = do_div(t, USEC_PER_SEC);
8696                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8697                                                                 t, usec_rem);
8698
8699                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8700                 usec_rem = do_div(t, USEC_PER_SEC);
8701                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8702         } else {
8703                 /* counter or tsc mode for trace_clock */
8704                 trace_seq_printf(s, "oldest event ts: %llu\n",
8705                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8706
8707                 trace_seq_printf(s, "now ts: %llu\n",
8708                                 ring_buffer_time_stamp(trace_buf->buffer));
8709         }
8710
8711         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8712         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8713
8714         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8715         trace_seq_printf(s, "read events: %ld\n", cnt);
8716
8717         count = simple_read_from_buffer(ubuf, count, ppos,
8718                                         s->buffer, trace_seq_used(s));
8719
8720         kfree(s);
8721
8722         return count;
8723 }
8724
8725 static const struct file_operations tracing_stats_fops = {
8726         .open           = tracing_open_generic_tr,
8727         .read           = tracing_stats_read,
8728         .llseek         = generic_file_llseek,
8729         .release        = tracing_release_generic_tr,
8730 };
8731
8732 #ifdef CONFIG_DYNAMIC_FTRACE
8733
8734 static ssize_t
8735 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8736                   size_t cnt, loff_t *ppos)
8737 {
8738         ssize_t ret;
8739         char *buf;
8740         int r;
8741
8742         /* 256 should be plenty to hold the amount needed */
8743         buf = kmalloc(256, GFP_KERNEL);
8744         if (!buf)
8745                 return -ENOMEM;
8746
8747         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8748                       ftrace_update_tot_cnt,
8749                       ftrace_number_of_pages,
8750                       ftrace_number_of_groups);
8751
8752         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8753         kfree(buf);
8754         return ret;
8755 }
8756
8757 static const struct file_operations tracing_dyn_info_fops = {
8758         .open           = tracing_open_generic,
8759         .read           = tracing_read_dyn_info,
8760         .llseek         = generic_file_llseek,
8761 };
8762 #endif /* CONFIG_DYNAMIC_FTRACE */
8763
8764 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8765 static void
8766 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8767                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8768                 void *data)
8769 {
8770         tracing_snapshot_instance(tr);
8771 }
8772
8773 static void
8774 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8775                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8776                       void *data)
8777 {
8778         struct ftrace_func_mapper *mapper = data;
8779         long *count = NULL;
8780
8781         if (mapper)
8782                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8783
8784         if (count) {
8785
8786                 if (*count <= 0)
8787                         return;
8788
8789                 (*count)--;
8790         }
8791
8792         tracing_snapshot_instance(tr);
8793 }
8794
8795 static int
8796 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8797                       struct ftrace_probe_ops *ops, void *data)
8798 {
8799         struct ftrace_func_mapper *mapper = data;
8800         long *count = NULL;
8801
8802         seq_printf(m, "%ps:", (void *)ip);
8803
8804         seq_puts(m, "snapshot");
8805
8806         if (mapper)
8807                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8808
8809         if (count)
8810                 seq_printf(m, ":count=%ld\n", *count);
8811         else
8812                 seq_puts(m, ":unlimited\n");
8813
8814         return 0;
8815 }
8816
8817 static int
8818 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8819                      unsigned long ip, void *init_data, void **data)
8820 {
8821         struct ftrace_func_mapper *mapper = *data;
8822
8823         if (!mapper) {
8824                 mapper = allocate_ftrace_func_mapper();
8825                 if (!mapper)
8826                         return -ENOMEM;
8827                 *data = mapper;
8828         }
8829
8830         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8831 }
8832
8833 static void
8834 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8835                      unsigned long ip, void *data)
8836 {
8837         struct ftrace_func_mapper *mapper = data;
8838
8839         if (!ip) {
8840                 if (!mapper)
8841                         return;
8842                 free_ftrace_func_mapper(mapper, NULL);
8843                 return;
8844         }
8845
8846         ftrace_func_mapper_remove_ip(mapper, ip);
8847 }
8848
8849 static struct ftrace_probe_ops snapshot_probe_ops = {
8850         .func                   = ftrace_snapshot,
8851         .print                  = ftrace_snapshot_print,
8852 };
8853
8854 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8855         .func                   = ftrace_count_snapshot,
8856         .print                  = ftrace_snapshot_print,
8857         .init                   = ftrace_snapshot_init,
8858         .free                   = ftrace_snapshot_free,
8859 };
8860
8861 static int
8862 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8863                                char *glob, char *cmd, char *param, int enable)
8864 {
8865         struct ftrace_probe_ops *ops;
8866         void *count = (void *)-1;
8867         char *number;
8868         int ret;
8869
8870         if (!tr)
8871                 return -ENODEV;
8872
8873         /* hash funcs only work with set_ftrace_filter */
8874         if (!enable)
8875                 return -EINVAL;
8876
8877         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8878
8879         if (glob[0] == '!')
8880                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8881
8882         if (!param)
8883                 goto out_reg;
8884
8885         number = strsep(&param, ":");
8886
8887         if (!strlen(number))
8888                 goto out_reg;
8889
8890         /*
8891          * We use the callback data field (which is a pointer)
8892          * as our counter.
8893          */
8894         ret = kstrtoul(number, 0, (unsigned long *)&count);
8895         if (ret)
8896                 return ret;
8897
8898  out_reg:
8899         ret = tracing_alloc_snapshot_instance(tr);
8900         if (ret < 0)
8901                 goto out;
8902
8903         ret = register_ftrace_function_probe(glob, tr, ops, count);
8904
8905  out:
8906         return ret < 0 ? ret : 0;
8907 }
8908
8909 static struct ftrace_func_command ftrace_snapshot_cmd = {
8910         .name                   = "snapshot",
8911         .func                   = ftrace_trace_snapshot_callback,
8912 };
8913
8914 static __init int register_snapshot_cmd(void)
8915 {
8916         return register_ftrace_command(&ftrace_snapshot_cmd);
8917 }
8918 #else
8919 static inline __init int register_snapshot_cmd(void) { return 0; }
8920 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8921
8922 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8923 {
8924         if (WARN_ON(!tr->dir))
8925                 return ERR_PTR(-ENODEV);
8926
8927         /* Top directory uses NULL as the parent */
8928         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8929                 return NULL;
8930
8931         /* All sub buffers have a descriptor */
8932         return tr->dir;
8933 }
8934
8935 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8936 {
8937         struct dentry *d_tracer;
8938
8939         if (tr->percpu_dir)
8940                 return tr->percpu_dir;
8941
8942         d_tracer = tracing_get_dentry(tr);
8943         if (IS_ERR(d_tracer))
8944                 return NULL;
8945
8946         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8947
8948         MEM_FAIL(!tr->percpu_dir,
8949                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8950
8951         return tr->percpu_dir;
8952 }
8953
8954 static struct dentry *
8955 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8956                       void *data, long cpu, const struct file_operations *fops)
8957 {
8958         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8959
8960         if (ret) /* See tracing_get_cpu() */
8961                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8962         return ret;
8963 }
8964
8965 static void
8966 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8967 {
8968         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8969         struct dentry *d_cpu;
8970         char cpu_dir[30]; /* 30 characters should be more than enough */
8971
8972         if (!d_percpu)
8973                 return;
8974
8975         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8976         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8977         if (!d_cpu) {
8978                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8979                 return;
8980         }
8981
8982         /* per cpu trace_pipe */
8983         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8984                                 tr, cpu, &tracing_pipe_fops);
8985
8986         /* per cpu trace */
8987         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8988                                 tr, cpu, &tracing_fops);
8989
8990         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8991                                 tr, cpu, &tracing_buffers_fops);
8992
8993         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8994                                 tr, cpu, &tracing_stats_fops);
8995
8996         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8997                                 tr, cpu, &tracing_entries_fops);
8998
8999 #ifdef CONFIG_TRACER_SNAPSHOT
9000         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9001                                 tr, cpu, &snapshot_fops);
9002
9003         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9004                                 tr, cpu, &snapshot_raw_fops);
9005 #endif
9006 }
9007
9008 #ifdef CONFIG_FTRACE_SELFTEST
9009 /* Let selftest have access to static functions in this file */
9010 #include "trace_selftest.c"
9011 #endif
9012
9013 static ssize_t
9014 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9015                         loff_t *ppos)
9016 {
9017         struct trace_option_dentry *topt = filp->private_data;
9018         char *buf;
9019
9020         if (topt->flags->val & topt->opt->bit)
9021                 buf = "1\n";
9022         else
9023                 buf = "0\n";
9024
9025         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9026 }
9027
9028 static ssize_t
9029 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9030                          loff_t *ppos)
9031 {
9032         struct trace_option_dentry *topt = filp->private_data;
9033         unsigned long val;
9034         int ret;
9035
9036         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9037         if (ret)
9038                 return ret;
9039
9040         if (val != 0 && val != 1)
9041                 return -EINVAL;
9042
9043         if (!!(topt->flags->val & topt->opt->bit) != val) {
9044                 mutex_lock(&trace_types_lock);
9045                 ret = __set_tracer_option(topt->tr, topt->flags,
9046                                           topt->opt, !val);
9047                 mutex_unlock(&trace_types_lock);
9048                 if (ret)
9049                         return ret;
9050         }
9051
9052         *ppos += cnt;
9053
9054         return cnt;
9055 }
9056
9057 static int tracing_open_options(struct inode *inode, struct file *filp)
9058 {
9059         struct trace_option_dentry *topt = inode->i_private;
9060         int ret;
9061
9062         ret = tracing_check_open_get_tr(topt->tr);
9063         if (ret)
9064                 return ret;
9065
9066         filp->private_data = inode->i_private;
9067         return 0;
9068 }
9069
9070 static int tracing_release_options(struct inode *inode, struct file *file)
9071 {
9072         struct trace_option_dentry *topt = file->private_data;
9073
9074         trace_array_put(topt->tr);
9075         return 0;
9076 }
9077
9078 static const struct file_operations trace_options_fops = {
9079         .open = tracing_open_options,
9080         .read = trace_options_read,
9081         .write = trace_options_write,
9082         .llseek = generic_file_llseek,
9083         .release = tracing_release_options,
9084 };
9085
9086 /*
9087  * In order to pass in both the trace_array descriptor as well as the index
9088  * to the flag that the trace option file represents, the trace_array
9089  * has a character array of trace_flags_index[], which holds the index
9090  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9091  * The address of this character array is passed to the flag option file
9092  * read/write callbacks.
9093  *
9094  * In order to extract both the index and the trace_array descriptor,
9095  * get_tr_index() uses the following algorithm.
9096  *
9097  *   idx = *ptr;
9098  *
9099  * As the pointer itself contains the address of the index (remember
9100  * index[1] == 1).
9101  *
9102  * Then to get the trace_array descriptor, by subtracting that index
9103  * from the ptr, we get to the start of the index itself.
9104  *
9105  *   ptr - idx == &index[0]
9106  *
9107  * Then a simple container_of() from that pointer gets us to the
9108  * trace_array descriptor.
9109  */
9110 static void get_tr_index(void *data, struct trace_array **ptr,
9111                          unsigned int *pindex)
9112 {
9113         *pindex = *(unsigned char *)data;
9114
9115         *ptr = container_of(data - *pindex, struct trace_array,
9116                             trace_flags_index);
9117 }
9118
9119 static ssize_t
9120 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9121                         loff_t *ppos)
9122 {
9123         void *tr_index = filp->private_data;
9124         struct trace_array *tr;
9125         unsigned int index;
9126         char *buf;
9127
9128         get_tr_index(tr_index, &tr, &index);
9129
9130         if (tr->trace_flags & (1 << index))
9131                 buf = "1\n";
9132         else
9133                 buf = "0\n";
9134
9135         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9136 }
9137
9138 static ssize_t
9139 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9140                          loff_t *ppos)
9141 {
9142         void *tr_index = filp->private_data;
9143         struct trace_array *tr;
9144         unsigned int index;
9145         unsigned long val;
9146         int ret;
9147
9148         get_tr_index(tr_index, &tr, &index);
9149
9150         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9151         if (ret)
9152                 return ret;
9153
9154         if (val != 0 && val != 1)
9155                 return -EINVAL;
9156
9157         mutex_lock(&event_mutex);
9158         mutex_lock(&trace_types_lock);
9159         ret = set_tracer_flag(tr, 1 << index, val);
9160         mutex_unlock(&trace_types_lock);
9161         mutex_unlock(&event_mutex);
9162
9163         if (ret < 0)
9164                 return ret;
9165
9166         *ppos += cnt;
9167
9168         return cnt;
9169 }
9170
9171 static const struct file_operations trace_options_core_fops = {
9172         .open = tracing_open_generic,
9173         .read = trace_options_core_read,
9174         .write = trace_options_core_write,
9175         .llseek = generic_file_llseek,
9176 };
9177
9178 struct dentry *trace_create_file(const char *name,
9179                                  umode_t mode,
9180                                  struct dentry *parent,
9181                                  void *data,
9182                                  const struct file_operations *fops)
9183 {
9184         struct dentry *ret;
9185
9186         ret = tracefs_create_file(name, mode, parent, data, fops);
9187         if (!ret)
9188                 pr_warn("Could not create tracefs '%s' entry\n", name);
9189
9190         return ret;
9191 }
9192
9193
9194 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9195 {
9196         struct dentry *d_tracer;
9197
9198         if (tr->options)
9199                 return tr->options;
9200
9201         d_tracer = tracing_get_dentry(tr);
9202         if (IS_ERR(d_tracer))
9203                 return NULL;
9204
9205         tr->options = tracefs_create_dir("options", d_tracer);
9206         if (!tr->options) {
9207                 pr_warn("Could not create tracefs directory 'options'\n");
9208                 return NULL;
9209         }
9210
9211         return tr->options;
9212 }
9213
9214 static void
9215 create_trace_option_file(struct trace_array *tr,
9216                          struct trace_option_dentry *topt,
9217                          struct tracer_flags *flags,
9218                          struct tracer_opt *opt)
9219 {
9220         struct dentry *t_options;
9221
9222         t_options = trace_options_init_dentry(tr);
9223         if (!t_options)
9224                 return;
9225
9226         topt->flags = flags;
9227         topt->opt = opt;
9228         topt->tr = tr;
9229
9230         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9231                                         t_options, topt, &trace_options_fops);
9232
9233 }
9234
9235 static void
9236 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9237 {
9238         struct trace_option_dentry *topts;
9239         struct trace_options *tr_topts;
9240         struct tracer_flags *flags;
9241         struct tracer_opt *opts;
9242         int cnt;
9243         int i;
9244
9245         if (!tracer)
9246                 return;
9247
9248         flags = tracer->flags;
9249
9250         if (!flags || !flags->opts)
9251                 return;
9252
9253         /*
9254          * If this is an instance, only create flags for tracers
9255          * the instance may have.
9256          */
9257         if (!trace_ok_for_array(tracer, tr))
9258                 return;
9259
9260         for (i = 0; i < tr->nr_topts; i++) {
9261                 /* Make sure there's no duplicate flags. */
9262                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9263                         return;
9264         }
9265
9266         opts = flags->opts;
9267
9268         for (cnt = 0; opts[cnt].name; cnt++)
9269                 ;
9270
9271         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9272         if (!topts)
9273                 return;
9274
9275         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9276                             GFP_KERNEL);
9277         if (!tr_topts) {
9278                 kfree(topts);
9279                 return;
9280         }
9281
9282         tr->topts = tr_topts;
9283         tr->topts[tr->nr_topts].tracer = tracer;
9284         tr->topts[tr->nr_topts].topts = topts;
9285         tr->nr_topts++;
9286
9287         for (cnt = 0; opts[cnt].name; cnt++) {
9288                 create_trace_option_file(tr, &topts[cnt], flags,
9289                                          &opts[cnt]);
9290                 MEM_FAIL(topts[cnt].entry == NULL,
9291                           "Failed to create trace option: %s",
9292                           opts[cnt].name);
9293         }
9294 }
9295
9296 static struct dentry *
9297 create_trace_option_core_file(struct trace_array *tr,
9298                               const char *option, long index)
9299 {
9300         struct dentry *t_options;
9301
9302         t_options = trace_options_init_dentry(tr);
9303         if (!t_options)
9304                 return NULL;
9305
9306         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9307                                  (void *)&tr->trace_flags_index[index],
9308                                  &trace_options_core_fops);
9309 }
9310
9311 static void create_trace_options_dir(struct trace_array *tr)
9312 {
9313         struct dentry *t_options;
9314         bool top_level = tr == &global_trace;
9315         int i;
9316
9317         t_options = trace_options_init_dentry(tr);
9318         if (!t_options)
9319                 return;
9320
9321         for (i = 0; trace_options[i]; i++) {
9322                 if (top_level ||
9323                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9324                         create_trace_option_core_file(tr, trace_options[i], i);
9325         }
9326 }
9327
9328 static ssize_t
9329 rb_simple_read(struct file *filp, char __user *ubuf,
9330                size_t cnt, loff_t *ppos)
9331 {
9332         struct trace_array *tr = filp->private_data;
9333         char buf[64];
9334         int r;
9335
9336         r = tracer_tracing_is_on(tr);
9337         r = sprintf(buf, "%d\n", r);
9338
9339         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9340 }
9341
9342 static ssize_t
9343 rb_simple_write(struct file *filp, const char __user *ubuf,
9344                 size_t cnt, loff_t *ppos)
9345 {
9346         struct trace_array *tr = filp->private_data;
9347         struct trace_buffer *buffer = tr->array_buffer.buffer;
9348         unsigned long val;
9349         int ret;
9350
9351         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9352         if (ret)
9353                 return ret;
9354
9355         if (buffer) {
9356                 mutex_lock(&trace_types_lock);
9357                 if (!!val == tracer_tracing_is_on(tr)) {
9358                         val = 0; /* do nothing */
9359                 } else if (val) {
9360                         tracer_tracing_on(tr);
9361                         if (tr->current_trace->start)
9362                                 tr->current_trace->start(tr);
9363                 } else {
9364                         tracer_tracing_off(tr);
9365                         if (tr->current_trace->stop)
9366                                 tr->current_trace->stop(tr);
9367                         /* Wake up any waiters */
9368                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9369                 }
9370                 mutex_unlock(&trace_types_lock);
9371         }
9372
9373         (*ppos)++;
9374
9375         return cnt;
9376 }
9377
9378 static const struct file_operations rb_simple_fops = {
9379         .open           = tracing_open_generic_tr,
9380         .read           = rb_simple_read,
9381         .write          = rb_simple_write,
9382         .release        = tracing_release_generic_tr,
9383         .llseek         = default_llseek,
9384 };
9385
9386 static ssize_t
9387 buffer_percent_read(struct file *filp, char __user *ubuf,
9388                     size_t cnt, loff_t *ppos)
9389 {
9390         struct trace_array *tr = filp->private_data;
9391         char buf[64];
9392         int r;
9393
9394         r = tr->buffer_percent;
9395         r = sprintf(buf, "%d\n", r);
9396
9397         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9398 }
9399
9400 static ssize_t
9401 buffer_percent_write(struct file *filp, const char __user *ubuf,
9402                      size_t cnt, loff_t *ppos)
9403 {
9404         struct trace_array *tr = filp->private_data;
9405         unsigned long val;
9406         int ret;
9407
9408         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9409         if (ret)
9410                 return ret;
9411
9412         if (val > 100)
9413                 return -EINVAL;
9414
9415         tr->buffer_percent = val;
9416
9417         (*ppos)++;
9418
9419         return cnt;
9420 }
9421
9422 static const struct file_operations buffer_percent_fops = {
9423         .open           = tracing_open_generic_tr,
9424         .read           = buffer_percent_read,
9425         .write          = buffer_percent_write,
9426         .release        = tracing_release_generic_tr,
9427         .llseek         = default_llseek,
9428 };
9429
9430 static ssize_t
9431 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9432 {
9433         struct trace_array *tr = filp->private_data;
9434         size_t size;
9435         char buf[64];
9436         int order;
9437         int r;
9438
9439         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9440         size = (PAGE_SIZE << order) / 1024;
9441
9442         r = sprintf(buf, "%zd\n", size);
9443
9444         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9445 }
9446
9447 static ssize_t
9448 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9449                          size_t cnt, loff_t *ppos)
9450 {
9451         struct trace_array *tr = filp->private_data;
9452         unsigned long val;
9453         int old_order;
9454         int order;
9455         int pages;
9456         int ret;
9457
9458         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9459         if (ret)
9460                 return ret;
9461
9462         val *= 1024; /* value passed in is in KB */
9463
9464         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9465         order = fls(pages - 1);
9466
9467         /* limit between 1 and 128 system pages */
9468         if (order < 0 || order > 7)
9469                 return -EINVAL;
9470
9471         /* Do not allow tracing while changing the order of the ring buffer */
9472         tracing_stop_tr(tr);
9473
9474         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9475         if (old_order == order)
9476                 goto out;
9477
9478         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9479         if (ret)
9480                 goto out;
9481
9482 #ifdef CONFIG_TRACER_MAX_TRACE
9483
9484         if (!tr->allocated_snapshot)
9485                 goto out_max;
9486
9487         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9488         if (ret) {
9489                 /* Put back the old order */
9490                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9491                 if (WARN_ON_ONCE(cnt)) {
9492                         /*
9493                          * AARGH! We are left with different orders!
9494                          * The max buffer is our "snapshot" buffer.
9495                          * When a tracer needs a snapshot (one of the
9496                          * latency tracers), it swaps the max buffer
9497                          * with the saved snap shot. We succeeded to
9498                          * update the order of the main buffer, but failed to
9499                          * update the order of the max buffer. But when we tried
9500                          * to reset the main buffer to the original size, we
9501                          * failed there too. This is very unlikely to
9502                          * happen, but if it does, warn and kill all
9503                          * tracing.
9504                          */
9505                         tracing_disabled = 1;
9506                 }
9507                 goto out;
9508         }
9509  out_max:
9510 #endif
9511         (*ppos)++;
9512  out:
9513         if (ret)
9514                 cnt = ret;
9515         tracing_start_tr(tr);
9516         return cnt;
9517 }
9518
9519 static const struct file_operations buffer_subbuf_size_fops = {
9520         .open           = tracing_open_generic_tr,
9521         .read           = buffer_subbuf_size_read,
9522         .write          = buffer_subbuf_size_write,
9523         .release        = tracing_release_generic_tr,
9524         .llseek         = default_llseek,
9525 };
9526
9527 static struct dentry *trace_instance_dir;
9528
9529 static void
9530 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9531
9532 static int
9533 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9534 {
9535         enum ring_buffer_flags rb_flags;
9536
9537         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9538
9539         buf->tr = tr;
9540
9541         buf->buffer = ring_buffer_alloc(size, rb_flags);
9542         if (!buf->buffer)
9543                 return -ENOMEM;
9544
9545         buf->data = alloc_percpu(struct trace_array_cpu);
9546         if (!buf->data) {
9547                 ring_buffer_free(buf->buffer);
9548                 buf->buffer = NULL;
9549                 return -ENOMEM;
9550         }
9551
9552         /* Allocate the first page for all buffers */
9553         set_buffer_entries(&tr->array_buffer,
9554                            ring_buffer_size(tr->array_buffer.buffer, 0));
9555
9556         return 0;
9557 }
9558
9559 static void free_trace_buffer(struct array_buffer *buf)
9560 {
9561         if (buf->buffer) {
9562                 ring_buffer_free(buf->buffer);
9563                 buf->buffer = NULL;
9564                 free_percpu(buf->data);
9565                 buf->data = NULL;
9566         }
9567 }
9568
9569 static int allocate_trace_buffers(struct trace_array *tr, int size)
9570 {
9571         int ret;
9572
9573         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9574         if (ret)
9575                 return ret;
9576
9577 #ifdef CONFIG_TRACER_MAX_TRACE
9578         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9579                                     allocate_snapshot ? size : 1);
9580         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9581                 free_trace_buffer(&tr->array_buffer);
9582                 return -ENOMEM;
9583         }
9584         tr->allocated_snapshot = allocate_snapshot;
9585
9586         allocate_snapshot = false;
9587 #endif
9588
9589         return 0;
9590 }
9591
9592 static void free_trace_buffers(struct trace_array *tr)
9593 {
9594         if (!tr)
9595                 return;
9596
9597         free_trace_buffer(&tr->array_buffer);
9598
9599 #ifdef CONFIG_TRACER_MAX_TRACE
9600         free_trace_buffer(&tr->max_buffer);
9601 #endif
9602 }
9603
9604 static void init_trace_flags_index(struct trace_array *tr)
9605 {
9606         int i;
9607
9608         /* Used by the trace options files */
9609         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9610                 tr->trace_flags_index[i] = i;
9611 }
9612
9613 static void __update_tracer_options(struct trace_array *tr)
9614 {
9615         struct tracer *t;
9616
9617         for (t = trace_types; t; t = t->next)
9618                 add_tracer_options(tr, t);
9619 }
9620
9621 static void update_tracer_options(struct trace_array *tr)
9622 {
9623         mutex_lock(&trace_types_lock);
9624         tracer_options_updated = true;
9625         __update_tracer_options(tr);
9626         mutex_unlock(&trace_types_lock);
9627 }
9628
9629 /* Must have trace_types_lock held */
9630 struct trace_array *trace_array_find(const char *instance)
9631 {
9632         struct trace_array *tr, *found = NULL;
9633
9634         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9635                 if (tr->name && strcmp(tr->name, instance) == 0) {
9636                         found = tr;
9637                         break;
9638                 }
9639         }
9640
9641         return found;
9642 }
9643
9644 struct trace_array *trace_array_find_get(const char *instance)
9645 {
9646         struct trace_array *tr;
9647
9648         mutex_lock(&trace_types_lock);
9649         tr = trace_array_find(instance);
9650         if (tr)
9651                 tr->ref++;
9652         mutex_unlock(&trace_types_lock);
9653
9654         return tr;
9655 }
9656
9657 static int trace_array_create_dir(struct trace_array *tr)
9658 {
9659         int ret;
9660
9661         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9662         if (!tr->dir)
9663                 return -EINVAL;
9664
9665         ret = event_trace_add_tracer(tr->dir, tr);
9666         if (ret) {
9667                 tracefs_remove(tr->dir);
9668                 return ret;
9669         }
9670
9671         init_tracer_tracefs(tr, tr->dir);
9672         __update_tracer_options(tr);
9673
9674         return ret;
9675 }
9676
9677 static struct trace_array *
9678 trace_array_create_systems(const char *name, const char *systems)
9679 {
9680         struct trace_array *tr;
9681         int ret;
9682
9683         ret = -ENOMEM;
9684         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9685         if (!tr)
9686                 return ERR_PTR(ret);
9687
9688         tr->name = kstrdup(name, GFP_KERNEL);
9689         if (!tr->name)
9690                 goto out_free_tr;
9691
9692         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9693                 goto out_free_tr;
9694
9695         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9696                 goto out_free_tr;
9697
9698         if (systems) {
9699                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9700                 if (!tr->system_names)
9701                         goto out_free_tr;
9702         }
9703
9704         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9705
9706         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9707
9708         raw_spin_lock_init(&tr->start_lock);
9709
9710         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9711
9712         tr->current_trace = &nop_trace;
9713
9714         INIT_LIST_HEAD(&tr->systems);
9715         INIT_LIST_HEAD(&tr->events);
9716         INIT_LIST_HEAD(&tr->hist_vars);
9717         INIT_LIST_HEAD(&tr->err_log);
9718
9719         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9720                 goto out_free_tr;
9721
9722         /* The ring buffer is defaultly expanded */
9723         trace_set_ring_buffer_expanded(tr);
9724
9725         if (ftrace_allocate_ftrace_ops(tr) < 0)
9726                 goto out_free_tr;
9727
9728         ftrace_init_trace_array(tr);
9729
9730         init_trace_flags_index(tr);
9731
9732         if (trace_instance_dir) {
9733                 ret = trace_array_create_dir(tr);
9734                 if (ret)
9735                         goto out_free_tr;
9736         } else
9737                 __trace_early_add_events(tr);
9738
9739         list_add(&tr->list, &ftrace_trace_arrays);
9740
9741         tr->ref++;
9742
9743         return tr;
9744
9745  out_free_tr:
9746         ftrace_free_ftrace_ops(tr);
9747         free_trace_buffers(tr);
9748         free_cpumask_var(tr->pipe_cpumask);
9749         free_cpumask_var(tr->tracing_cpumask);
9750         kfree_const(tr->system_names);
9751         kfree(tr->name);
9752         kfree(tr);
9753
9754         return ERR_PTR(ret);
9755 }
9756
9757 static struct trace_array *trace_array_create(const char *name)
9758 {
9759         return trace_array_create_systems(name, NULL);
9760 }
9761
9762 static int instance_mkdir(const char *name)
9763 {
9764         struct trace_array *tr;
9765         int ret;
9766
9767         mutex_lock(&event_mutex);
9768         mutex_lock(&trace_types_lock);
9769
9770         ret = -EEXIST;
9771         if (trace_array_find(name))
9772                 goto out_unlock;
9773
9774         tr = trace_array_create(name);
9775
9776         ret = PTR_ERR_OR_ZERO(tr);
9777
9778 out_unlock:
9779         mutex_unlock(&trace_types_lock);
9780         mutex_unlock(&event_mutex);
9781         return ret;
9782 }
9783
9784 /**
9785  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9786  * @name: The name of the trace array to be looked up/created.
9787  * @systems: A list of systems to create event directories for (NULL for all)
9788  *
9789  * Returns pointer to trace array with given name.
9790  * NULL, if it cannot be created.
9791  *
9792  * NOTE: This function increments the reference counter associated with the
9793  * trace array returned. This makes sure it cannot be freed while in use.
9794  * Use trace_array_put() once the trace array is no longer needed.
9795  * If the trace_array is to be freed, trace_array_destroy() needs to
9796  * be called after the trace_array_put(), or simply let user space delete
9797  * it from the tracefs instances directory. But until the
9798  * trace_array_put() is called, user space can not delete it.
9799  *
9800  */
9801 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9802 {
9803         struct trace_array *tr;
9804
9805         mutex_lock(&event_mutex);
9806         mutex_lock(&trace_types_lock);
9807
9808         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9809                 if (tr->name && strcmp(tr->name, name) == 0)
9810                         goto out_unlock;
9811         }
9812
9813         tr = trace_array_create_systems(name, systems);
9814
9815         if (IS_ERR(tr))
9816                 tr = NULL;
9817 out_unlock:
9818         if (tr)
9819                 tr->ref++;
9820
9821         mutex_unlock(&trace_types_lock);
9822         mutex_unlock(&event_mutex);
9823         return tr;
9824 }
9825 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9826
9827 static int __remove_instance(struct trace_array *tr)
9828 {
9829         int i;
9830
9831         /* Reference counter for a newly created trace array = 1. */
9832         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9833                 return -EBUSY;
9834
9835         list_del(&tr->list);
9836
9837         /* Disable all the flags that were enabled coming in */
9838         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9839                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9840                         set_tracer_flag(tr, 1 << i, 0);
9841         }
9842
9843         tracing_set_nop(tr);
9844         clear_ftrace_function_probes(tr);
9845         event_trace_del_tracer(tr);
9846         ftrace_clear_pids(tr);
9847         ftrace_destroy_function_files(tr);
9848         tracefs_remove(tr->dir);
9849         free_percpu(tr->last_func_repeats);
9850         free_trace_buffers(tr);
9851         clear_tracing_err_log(tr);
9852
9853         for (i = 0; i < tr->nr_topts; i++) {
9854                 kfree(tr->topts[i].topts);
9855         }
9856         kfree(tr->topts);
9857
9858         free_cpumask_var(tr->pipe_cpumask);
9859         free_cpumask_var(tr->tracing_cpumask);
9860         kfree_const(tr->system_names);
9861         kfree(tr->name);
9862         kfree(tr);
9863
9864         return 0;
9865 }
9866
9867 int trace_array_destroy(struct trace_array *this_tr)
9868 {
9869         struct trace_array *tr;
9870         int ret;
9871
9872         if (!this_tr)
9873                 return -EINVAL;
9874
9875         mutex_lock(&event_mutex);
9876         mutex_lock(&trace_types_lock);
9877
9878         ret = -ENODEV;
9879
9880         /* Making sure trace array exists before destroying it. */
9881         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9882                 if (tr == this_tr) {
9883                         ret = __remove_instance(tr);
9884                         break;
9885                 }
9886         }
9887
9888         mutex_unlock(&trace_types_lock);
9889         mutex_unlock(&event_mutex);
9890
9891         return ret;
9892 }
9893 EXPORT_SYMBOL_GPL(trace_array_destroy);
9894
9895 static int instance_rmdir(const char *name)
9896 {
9897         struct trace_array *tr;
9898         int ret;
9899
9900         mutex_lock(&event_mutex);
9901         mutex_lock(&trace_types_lock);
9902
9903         ret = -ENODEV;
9904         tr = trace_array_find(name);
9905         if (tr)
9906                 ret = __remove_instance(tr);
9907
9908         mutex_unlock(&trace_types_lock);
9909         mutex_unlock(&event_mutex);
9910
9911         return ret;
9912 }
9913
9914 static __init void create_trace_instances(struct dentry *d_tracer)
9915 {
9916         struct trace_array *tr;
9917
9918         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9919                                                          instance_mkdir,
9920                                                          instance_rmdir);
9921         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9922                 return;
9923
9924         mutex_lock(&event_mutex);
9925         mutex_lock(&trace_types_lock);
9926
9927         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9928                 if (!tr->name)
9929                         continue;
9930                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9931                              "Failed to create instance directory\n"))
9932                         break;
9933         }
9934
9935         mutex_unlock(&trace_types_lock);
9936         mutex_unlock(&event_mutex);
9937 }
9938
9939 static void
9940 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9941 {
9942         int cpu;
9943
9944         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9945                         tr, &show_traces_fops);
9946
9947         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9948                         tr, &set_tracer_fops);
9949
9950         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9951                           tr, &tracing_cpumask_fops);
9952
9953         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9954                           tr, &tracing_iter_fops);
9955
9956         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9957                           tr, &tracing_fops);
9958
9959         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9960                           tr, &tracing_pipe_fops);
9961
9962         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9963                           tr, &tracing_entries_fops);
9964
9965         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9966                           tr, &tracing_total_entries_fops);
9967
9968         trace_create_file("free_buffer", 0200, d_tracer,
9969                           tr, &tracing_free_buffer_fops);
9970
9971         trace_create_file("trace_marker", 0220, d_tracer,
9972                           tr, &tracing_mark_fops);
9973
9974         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9975
9976         trace_create_file("trace_marker_raw", 0220, d_tracer,
9977                           tr, &tracing_mark_raw_fops);
9978
9979         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9980                           &trace_clock_fops);
9981
9982         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9983                           tr, &rb_simple_fops);
9984
9985         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9986                           &trace_time_stamp_mode_fops);
9987
9988         tr->buffer_percent = 50;
9989
9990         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9991                         tr, &buffer_percent_fops);
9992
9993         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9994                           tr, &buffer_subbuf_size_fops);
9995
9996         create_trace_options_dir(tr);
9997
9998 #ifdef CONFIG_TRACER_MAX_TRACE
9999         trace_create_maxlat_file(tr, d_tracer);
10000 #endif
10001
10002         if (ftrace_create_function_files(tr, d_tracer))
10003                 MEM_FAIL(1, "Could not allocate function filter files");
10004
10005 #ifdef CONFIG_TRACER_SNAPSHOT
10006         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10007                           tr, &snapshot_fops);
10008 #endif
10009
10010         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10011                           tr, &tracing_err_log_fops);
10012
10013         for_each_tracing_cpu(cpu)
10014                 tracing_init_tracefs_percpu(tr, cpu);
10015
10016         ftrace_init_tracefs(tr, d_tracer);
10017 }
10018
10019 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10020 {
10021         struct vfsmount *mnt;
10022         struct file_system_type *type;
10023
10024         /*
10025          * To maintain backward compatibility for tools that mount
10026          * debugfs to get to the tracing facility, tracefs is automatically
10027          * mounted to the debugfs/tracing directory.
10028          */
10029         type = get_fs_type("tracefs");
10030         if (!type)
10031                 return NULL;
10032         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10033         put_filesystem(type);
10034         if (IS_ERR(mnt))
10035                 return NULL;
10036         mntget(mnt);
10037
10038         return mnt;
10039 }
10040
10041 /**
10042  * tracing_init_dentry - initialize top level trace array
10043  *
10044  * This is called when creating files or directories in the tracing
10045  * directory. It is called via fs_initcall() by any of the boot up code
10046  * and expects to return the dentry of the top level tracing directory.
10047  */
10048 int tracing_init_dentry(void)
10049 {
10050         struct trace_array *tr = &global_trace;
10051
10052         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10053                 pr_warn("Tracing disabled due to lockdown\n");
10054                 return -EPERM;
10055         }
10056
10057         /* The top level trace array uses  NULL as parent */
10058         if (tr->dir)
10059                 return 0;
10060
10061         if (WARN_ON(!tracefs_initialized()))
10062                 return -ENODEV;
10063
10064         /*
10065          * As there may still be users that expect the tracing
10066          * files to exist in debugfs/tracing, we must automount
10067          * the tracefs file system there, so older tools still
10068          * work with the newer kernel.
10069          */
10070         tr->dir = debugfs_create_automount("tracing", NULL,
10071                                            trace_automount, NULL);
10072
10073         return 0;
10074 }
10075
10076 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10077 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10078
10079 static struct workqueue_struct *eval_map_wq __initdata;
10080 static struct work_struct eval_map_work __initdata;
10081 static struct work_struct tracerfs_init_work __initdata;
10082
10083 static void __init eval_map_work_func(struct work_struct *work)
10084 {
10085         int len;
10086
10087         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10088         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10089 }
10090
10091 static int __init trace_eval_init(void)
10092 {
10093         INIT_WORK(&eval_map_work, eval_map_work_func);
10094
10095         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10096         if (!eval_map_wq) {
10097                 pr_err("Unable to allocate eval_map_wq\n");
10098                 /* Do work here */
10099                 eval_map_work_func(&eval_map_work);
10100                 return -ENOMEM;
10101         }
10102
10103         queue_work(eval_map_wq, &eval_map_work);
10104         return 0;
10105 }
10106
10107 subsys_initcall(trace_eval_init);
10108
10109 static int __init trace_eval_sync(void)
10110 {
10111         /* Make sure the eval map updates are finished */
10112         if (eval_map_wq)
10113                 destroy_workqueue(eval_map_wq);
10114         return 0;
10115 }
10116
10117 late_initcall_sync(trace_eval_sync);
10118
10119
10120 #ifdef CONFIG_MODULES
10121 static void trace_module_add_evals(struct module *mod)
10122 {
10123         if (!mod->num_trace_evals)
10124                 return;
10125
10126         /*
10127          * Modules with bad taint do not have events created, do
10128          * not bother with enums either.
10129          */
10130         if (trace_module_has_bad_taint(mod))
10131                 return;
10132
10133         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10134 }
10135
10136 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10137 static void trace_module_remove_evals(struct module *mod)
10138 {
10139         union trace_eval_map_item *map;
10140         union trace_eval_map_item **last = &trace_eval_maps;
10141
10142         if (!mod->num_trace_evals)
10143                 return;
10144
10145         mutex_lock(&trace_eval_mutex);
10146
10147         map = trace_eval_maps;
10148
10149         while (map) {
10150                 if (map->head.mod == mod)
10151                         break;
10152                 map = trace_eval_jmp_to_tail(map);
10153                 last = &map->tail.next;
10154                 map = map->tail.next;
10155         }
10156         if (!map)
10157                 goto out;
10158
10159         *last = trace_eval_jmp_to_tail(map)->tail.next;
10160         kfree(map);
10161  out:
10162         mutex_unlock(&trace_eval_mutex);
10163 }
10164 #else
10165 static inline void trace_module_remove_evals(struct module *mod) { }
10166 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10167
10168 static int trace_module_notify(struct notifier_block *self,
10169                                unsigned long val, void *data)
10170 {
10171         struct module *mod = data;
10172
10173         switch (val) {
10174         case MODULE_STATE_COMING:
10175                 trace_module_add_evals(mod);
10176                 break;
10177         case MODULE_STATE_GOING:
10178                 trace_module_remove_evals(mod);
10179                 break;
10180         }
10181
10182         return NOTIFY_OK;
10183 }
10184
10185 static struct notifier_block trace_module_nb = {
10186         .notifier_call = trace_module_notify,
10187         .priority = 0,
10188 };
10189 #endif /* CONFIG_MODULES */
10190
10191 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10192 {
10193
10194         event_trace_init();
10195
10196         init_tracer_tracefs(&global_trace, NULL);
10197         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10198
10199         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10200                         &global_trace, &tracing_thresh_fops);
10201
10202         trace_create_file("README", TRACE_MODE_READ, NULL,
10203                         NULL, &tracing_readme_fops);
10204
10205         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10206                         NULL, &tracing_saved_cmdlines_fops);
10207
10208         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10209                           NULL, &tracing_saved_cmdlines_size_fops);
10210
10211         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10212                         NULL, &tracing_saved_tgids_fops);
10213
10214         trace_create_eval_file(NULL);
10215
10216 #ifdef CONFIG_MODULES
10217         register_module_notifier(&trace_module_nb);
10218 #endif
10219
10220 #ifdef CONFIG_DYNAMIC_FTRACE
10221         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10222                         NULL, &tracing_dyn_info_fops);
10223 #endif
10224
10225         create_trace_instances(NULL);
10226
10227         update_tracer_options(&global_trace);
10228 }
10229
10230 static __init int tracer_init_tracefs(void)
10231 {
10232         int ret;
10233
10234         trace_access_lock_init();
10235
10236         ret = tracing_init_dentry();
10237         if (ret)
10238                 return 0;
10239
10240         if (eval_map_wq) {
10241                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10242                 queue_work(eval_map_wq, &tracerfs_init_work);
10243         } else {
10244                 tracer_init_tracefs_work_func(NULL);
10245         }
10246
10247         rv_init_interface();
10248
10249         return 0;
10250 }
10251
10252 fs_initcall(tracer_init_tracefs);
10253
10254 static int trace_die_panic_handler(struct notifier_block *self,
10255                                 unsigned long ev, void *unused);
10256
10257 static struct notifier_block trace_panic_notifier = {
10258         .notifier_call = trace_die_panic_handler,
10259         .priority = INT_MAX - 1,
10260 };
10261
10262 static struct notifier_block trace_die_notifier = {
10263         .notifier_call = trace_die_panic_handler,
10264         .priority = INT_MAX - 1,
10265 };
10266
10267 /*
10268  * The idea is to execute the following die/panic callback early, in order
10269  * to avoid showing irrelevant information in the trace (like other panic
10270  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10271  * warnings get disabled (to prevent potential log flooding).
10272  */
10273 static int trace_die_panic_handler(struct notifier_block *self,
10274                                 unsigned long ev, void *unused)
10275 {
10276         if (!ftrace_dump_on_oops)
10277                 return NOTIFY_DONE;
10278
10279         /* The die notifier requires DIE_OOPS to trigger */
10280         if (self == &trace_die_notifier && ev != DIE_OOPS)
10281                 return NOTIFY_DONE;
10282
10283         ftrace_dump(ftrace_dump_on_oops);
10284
10285         return NOTIFY_DONE;
10286 }
10287
10288 /*
10289  * printk is set to max of 1024, we really don't need it that big.
10290  * Nothing should be printing 1000 characters anyway.
10291  */
10292 #define TRACE_MAX_PRINT         1000
10293
10294 /*
10295  * Define here KERN_TRACE so that we have one place to modify
10296  * it if we decide to change what log level the ftrace dump
10297  * should be at.
10298  */
10299 #define KERN_TRACE              KERN_EMERG
10300
10301 void
10302 trace_printk_seq(struct trace_seq *s)
10303 {
10304         /* Probably should print a warning here. */
10305         if (s->seq.len >= TRACE_MAX_PRINT)
10306                 s->seq.len = TRACE_MAX_PRINT;
10307
10308         /*
10309          * More paranoid code. Although the buffer size is set to
10310          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10311          * an extra layer of protection.
10312          */
10313         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10314                 s->seq.len = s->seq.size - 1;
10315
10316         /* should be zero ended, but we are paranoid. */
10317         s->buffer[s->seq.len] = 0;
10318
10319         printk(KERN_TRACE "%s", s->buffer);
10320
10321         trace_seq_init(s);
10322 }
10323
10324 void trace_init_global_iter(struct trace_iterator *iter)
10325 {
10326         iter->tr = &global_trace;
10327         iter->trace = iter->tr->current_trace;
10328         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10329         iter->array_buffer = &global_trace.array_buffer;
10330
10331         if (iter->trace && iter->trace->open)
10332                 iter->trace->open(iter);
10333
10334         /* Annotate start of buffers if we had overruns */
10335         if (ring_buffer_overruns(iter->array_buffer->buffer))
10336                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10337
10338         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10339         if (trace_clocks[iter->tr->clock_id].in_ns)
10340                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10341
10342         /* Can not use kmalloc for iter.temp and iter.fmt */
10343         iter->temp = static_temp_buf;
10344         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10345         iter->fmt = static_fmt_buf;
10346         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10347 }
10348
10349 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10350 {
10351         /* use static because iter can be a bit big for the stack */
10352         static struct trace_iterator iter;
10353         static atomic_t dump_running;
10354         struct trace_array *tr = &global_trace;
10355         unsigned int old_userobj;
10356         unsigned long flags;
10357         int cnt = 0, cpu;
10358
10359         /* Only allow one dump user at a time. */
10360         if (atomic_inc_return(&dump_running) != 1) {
10361                 atomic_dec(&dump_running);
10362                 return;
10363         }
10364
10365         /*
10366          * Always turn off tracing when we dump.
10367          * We don't need to show trace output of what happens
10368          * between multiple crashes.
10369          *
10370          * If the user does a sysrq-z, then they can re-enable
10371          * tracing with echo 1 > tracing_on.
10372          */
10373         tracing_off();
10374
10375         local_irq_save(flags);
10376
10377         /* Simulate the iterator */
10378         trace_init_global_iter(&iter);
10379
10380         for_each_tracing_cpu(cpu) {
10381                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10382         }
10383
10384         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10385
10386         /* don't look at user memory in panic mode */
10387         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10388
10389         switch (oops_dump_mode) {
10390         case DUMP_ALL:
10391                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10392                 break;
10393         case DUMP_ORIG:
10394                 iter.cpu_file = raw_smp_processor_id();
10395                 break;
10396         case DUMP_NONE:
10397                 goto out_enable;
10398         default:
10399                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10400                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10401         }
10402
10403         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10404
10405         /* Did function tracer already get disabled? */
10406         if (ftrace_is_dead()) {
10407                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10408                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10409         }
10410
10411         /*
10412          * We need to stop all tracing on all CPUS to read
10413          * the next buffer. This is a bit expensive, but is
10414          * not done often. We fill all what we can read,
10415          * and then release the locks again.
10416          */
10417
10418         while (!trace_empty(&iter)) {
10419
10420                 if (!cnt)
10421                         printk(KERN_TRACE "---------------------------------\n");
10422
10423                 cnt++;
10424
10425                 trace_iterator_reset(&iter);
10426                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10427
10428                 if (trace_find_next_entry_inc(&iter) != NULL) {
10429                         int ret;
10430
10431                         ret = print_trace_line(&iter);
10432                         if (ret != TRACE_TYPE_NO_CONSUME)
10433                                 trace_consume(&iter);
10434                 }
10435                 touch_nmi_watchdog();
10436
10437                 trace_printk_seq(&iter.seq);
10438         }
10439
10440         if (!cnt)
10441                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10442         else
10443                 printk(KERN_TRACE "---------------------------------\n");
10444
10445  out_enable:
10446         tr->trace_flags |= old_userobj;
10447
10448         for_each_tracing_cpu(cpu) {
10449                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10450         }
10451         atomic_dec(&dump_running);
10452         local_irq_restore(flags);
10453 }
10454 EXPORT_SYMBOL_GPL(ftrace_dump);
10455
10456 #define WRITE_BUFSIZE  4096
10457
10458 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10459                                 size_t count, loff_t *ppos,
10460                                 int (*createfn)(const char *))
10461 {
10462         char *kbuf, *buf, *tmp;
10463         int ret = 0;
10464         size_t done = 0;
10465         size_t size;
10466
10467         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10468         if (!kbuf)
10469                 return -ENOMEM;
10470
10471         while (done < count) {
10472                 size = count - done;
10473
10474                 if (size >= WRITE_BUFSIZE)
10475                         size = WRITE_BUFSIZE - 1;
10476
10477                 if (copy_from_user(kbuf, buffer + done, size)) {
10478                         ret = -EFAULT;
10479                         goto out;
10480                 }
10481                 kbuf[size] = '\0';
10482                 buf = kbuf;
10483                 do {
10484                         tmp = strchr(buf, '\n');
10485                         if (tmp) {
10486                                 *tmp = '\0';
10487                                 size = tmp - buf + 1;
10488                         } else {
10489                                 size = strlen(buf);
10490                                 if (done + size < count) {
10491                                         if (buf != kbuf)
10492                                                 break;
10493                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10494                                         pr_warn("Line length is too long: Should be less than %d\n",
10495                                                 WRITE_BUFSIZE - 2);
10496                                         ret = -EINVAL;
10497                                         goto out;
10498                                 }
10499                         }
10500                         done += size;
10501
10502                         /* Remove comments */
10503                         tmp = strchr(buf, '#');
10504
10505                         if (tmp)
10506                                 *tmp = '\0';
10507
10508                         ret = createfn(buf);
10509                         if (ret)
10510                                 goto out;
10511                         buf += size;
10512
10513                 } while (done < count);
10514         }
10515         ret = done;
10516
10517 out:
10518         kfree(kbuf);
10519
10520         return ret;
10521 }
10522
10523 #ifdef CONFIG_TRACER_MAX_TRACE
10524 __init static bool tr_needs_alloc_snapshot(const char *name)
10525 {
10526         char *test;
10527         int len = strlen(name);
10528         bool ret;
10529
10530         if (!boot_snapshot_index)
10531                 return false;
10532
10533         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10534             boot_snapshot_info[len] == '\t')
10535                 return true;
10536
10537         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10538         if (!test)
10539                 return false;
10540
10541         sprintf(test, "\t%s\t", name);
10542         ret = strstr(boot_snapshot_info, test) == NULL;
10543         kfree(test);
10544         return ret;
10545 }
10546
10547 __init static void do_allocate_snapshot(const char *name)
10548 {
10549         if (!tr_needs_alloc_snapshot(name))
10550                 return;
10551
10552         /*
10553          * When allocate_snapshot is set, the next call to
10554          * allocate_trace_buffers() (called by trace_array_get_by_name())
10555          * will allocate the snapshot buffer. That will alse clear
10556          * this flag.
10557          */
10558         allocate_snapshot = true;
10559 }
10560 #else
10561 static inline void do_allocate_snapshot(const char *name) { }
10562 #endif
10563
10564 __init static void enable_instances(void)
10565 {
10566         struct trace_array *tr;
10567         char *curr_str;
10568         char *str;
10569         char *tok;
10570
10571         /* A tab is always appended */
10572         boot_instance_info[boot_instance_index - 1] = '\0';
10573         str = boot_instance_info;
10574
10575         while ((curr_str = strsep(&str, "\t"))) {
10576
10577                 tok = strsep(&curr_str, ",");
10578
10579                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10580                         do_allocate_snapshot(tok);
10581
10582                 tr = trace_array_get_by_name(tok, NULL);
10583                 if (!tr) {
10584                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10585                         continue;
10586                 }
10587                 /* Allow user space to delete it */
10588                 trace_array_put(tr);
10589
10590                 while ((tok = strsep(&curr_str, ","))) {
10591                         early_enable_events(tr, tok, true);
10592                 }
10593         }
10594 }
10595
10596 __init static int tracer_alloc_buffers(void)
10597 {
10598         int ring_buf_size;
10599         int ret = -ENOMEM;
10600
10601
10602         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10603                 pr_warn("Tracing disabled due to lockdown\n");
10604                 return -EPERM;
10605         }
10606
10607         /*
10608          * Make sure we don't accidentally add more trace options
10609          * than we have bits for.
10610          */
10611         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10612
10613         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10614                 goto out;
10615
10616         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10617                 goto out_free_buffer_mask;
10618
10619         /* Only allocate trace_printk buffers if a trace_printk exists */
10620         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10621                 /* Must be called before global_trace.buffer is allocated */
10622                 trace_printk_init_buffers();
10623
10624         /* To save memory, keep the ring buffer size to its minimum */
10625         if (global_trace.ring_buffer_expanded)
10626                 ring_buf_size = trace_buf_size;
10627         else
10628                 ring_buf_size = 1;
10629
10630         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10631         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10632
10633         raw_spin_lock_init(&global_trace.start_lock);
10634
10635         /*
10636          * The prepare callbacks allocates some memory for the ring buffer. We
10637          * don't free the buffer if the CPU goes down. If we were to free
10638          * the buffer, then the user would lose any trace that was in the
10639          * buffer. The memory will be removed once the "instance" is removed.
10640          */
10641         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10642                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10643                                       NULL);
10644         if (ret < 0)
10645                 goto out_free_cpumask;
10646         /* Used for event triggers */
10647         ret = -ENOMEM;
10648         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10649         if (!temp_buffer)
10650                 goto out_rm_hp_state;
10651
10652         if (trace_create_savedcmd() < 0)
10653                 goto out_free_temp_buffer;
10654
10655         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10656                 goto out_free_savedcmd;
10657
10658         /* TODO: make the number of buffers hot pluggable with CPUS */
10659         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10660                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10661                 goto out_free_pipe_cpumask;
10662         }
10663         if (global_trace.buffer_disabled)
10664                 tracing_off();
10665
10666         if (trace_boot_clock) {
10667                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10668                 if (ret < 0)
10669                         pr_warn("Trace clock %s not defined, going back to default\n",
10670                                 trace_boot_clock);
10671         }
10672
10673         /*
10674          * register_tracer() might reference current_trace, so it
10675          * needs to be set before we register anything. This is
10676          * just a bootstrap of current_trace anyway.
10677          */
10678         global_trace.current_trace = &nop_trace;
10679
10680         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10681
10682         ftrace_init_global_array_ops(&global_trace);
10683
10684         init_trace_flags_index(&global_trace);
10685
10686         register_tracer(&nop_trace);
10687
10688         /* Function tracing may start here (via kernel command line) */
10689         init_function_trace();
10690
10691         /* All seems OK, enable tracing */
10692         tracing_disabled = 0;
10693
10694         atomic_notifier_chain_register(&panic_notifier_list,
10695                                        &trace_panic_notifier);
10696
10697         register_die_notifier(&trace_die_notifier);
10698
10699         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10700
10701         INIT_LIST_HEAD(&global_trace.systems);
10702         INIT_LIST_HEAD(&global_trace.events);
10703         INIT_LIST_HEAD(&global_trace.hist_vars);
10704         INIT_LIST_HEAD(&global_trace.err_log);
10705         list_add(&global_trace.list, &ftrace_trace_arrays);
10706
10707         apply_trace_boot_options();
10708
10709         register_snapshot_cmd();
10710
10711         test_can_verify();
10712
10713         return 0;
10714
10715 out_free_pipe_cpumask:
10716         free_cpumask_var(global_trace.pipe_cpumask);
10717 out_free_savedcmd:
10718         free_saved_cmdlines_buffer(savedcmd);
10719 out_free_temp_buffer:
10720         ring_buffer_free(temp_buffer);
10721 out_rm_hp_state:
10722         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10723 out_free_cpumask:
10724         free_cpumask_var(global_trace.tracing_cpumask);
10725 out_free_buffer_mask:
10726         free_cpumask_var(tracing_buffer_mask);
10727 out:
10728         return ret;
10729 }
10730
10731 void __init ftrace_boot_snapshot(void)
10732 {
10733 #ifdef CONFIG_TRACER_MAX_TRACE
10734         struct trace_array *tr;
10735
10736         if (!snapshot_at_boot)
10737                 return;
10738
10739         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10740                 if (!tr->allocated_snapshot)
10741                         continue;
10742
10743                 tracing_snapshot_instance(tr);
10744                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10745         }
10746 #endif
10747 }
10748
10749 void __init early_trace_init(void)
10750 {
10751         if (tracepoint_printk) {
10752                 tracepoint_print_iter =
10753                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10754                 if (MEM_FAIL(!tracepoint_print_iter,
10755                              "Failed to allocate trace iterator\n"))
10756                         tracepoint_printk = 0;
10757                 else
10758                         static_key_enable(&tracepoint_printk_key.key);
10759         }
10760         tracer_alloc_buffers();
10761
10762         init_events();
10763 }
10764
10765 void __init trace_init(void)
10766 {
10767         trace_event_init();
10768
10769         if (boot_instance_index)
10770                 enable_instances();
10771 }
10772
10773 __init static void clear_boot_tracer(void)
10774 {
10775         /*
10776          * The default tracer at boot buffer is an init section.
10777          * This function is called in lateinit. If we did not
10778          * find the boot tracer, then clear it out, to prevent
10779          * later registration from accessing the buffer that is
10780          * about to be freed.
10781          */
10782         if (!default_bootup_tracer)
10783                 return;
10784
10785         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10786                default_bootup_tracer);
10787         default_bootup_tracer = NULL;
10788 }
10789
10790 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10791 __init static void tracing_set_default_clock(void)
10792 {
10793         /* sched_clock_stable() is determined in late_initcall */
10794         if (!trace_boot_clock && !sched_clock_stable()) {
10795                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10796                         pr_warn("Can not set tracing clock due to lockdown\n");
10797                         return;
10798                 }
10799
10800                 printk(KERN_WARNING
10801                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10802                        "If you want to keep using the local clock, then add:\n"
10803                        "  \"trace_clock=local\"\n"
10804                        "on the kernel command line\n");
10805                 tracing_set_clock(&global_trace, "global");
10806         }
10807 }
10808 #else
10809 static inline void tracing_set_default_clock(void) { }
10810 #endif
10811
10812 __init static int late_trace_init(void)
10813 {
10814         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10815                 static_key_disable(&tracepoint_printk_key.key);
10816                 tracepoint_printk = 0;
10817         }
10818
10819         tracing_set_default_clock();
10820         clear_boot_tracer();
10821         return 0;
10822 }
10823
10824 late_initcall_sync(late_trace_init);