GNU Linux-libre 5.10.217-gnu1
[releases.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned long flags, int pc);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187
188 static int __init set_cmdline_ftrace(char *str)
189 {
190         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191         default_bootup_tracer = bootup_tracer_buf;
192         /* We are using ftrace early, expand it */
193         ring_buffer_expanded = true;
194         return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200         if (*str++ != '=' || !*str) {
201                 ftrace_dump_on_oops = DUMP_ALL;
202                 return 1;
203         }
204
205         if (!strcmp("orig_cpu", str)) {
206                 ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213
214 static int __init stop_trace_on_warning(char *str)
215 {
216         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217                 __disable_trace_on_warning = 1;
218         return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221
222 static int __init boot_alloc_snapshot(char *str)
223 {
224         allocate_snapshot = true;
225         /* We also need the main ring buffer expanded */
226         ring_buffer_expanded = true;
227         return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230
231
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233
234 static int __init set_trace_boot_options(char *str)
235 {
236         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237         return 1;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243
244 static int __init set_trace_boot_clock(char *str)
245 {
246         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247         trace_boot_clock = trace_boot_clock_buf;
248         return 1;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251
252 static int __init set_tracepoint_printk(char *str)
253 {
254         /* Ignore the "tp_printk_stop_on_boot" param */
255         if (*str == '_')
256                 return 0;
257
258         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
259                 tracepoint_printk = 1;
260         return 1;
261 }
262 __setup("tp_printk", set_tracepoint_printk);
263
264 unsigned long long ns2usecs(u64 nsec)
265 {
266         nsec += 500;
267         do_div(nsec, 1000);
268         return nsec;
269 }
270
271 static void
272 trace_process_export(struct trace_export *export,
273                struct ring_buffer_event *event, int flag)
274 {
275         struct trace_entry *entry;
276         unsigned int size = 0;
277
278         if (export->flags & flag) {
279                 entry = ring_buffer_event_data(event);
280                 size = ring_buffer_event_length(event);
281                 export->write(export, entry, size);
282         }
283 }
284
285 static DEFINE_MUTEX(ftrace_export_lock);
286
287 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
288
289 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
290 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
291 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
292
293 static inline void ftrace_exports_enable(struct trace_export *export)
294 {
295         if (export->flags & TRACE_EXPORT_FUNCTION)
296                 static_branch_inc(&trace_function_exports_enabled);
297
298         if (export->flags & TRACE_EXPORT_EVENT)
299                 static_branch_inc(&trace_event_exports_enabled);
300
301         if (export->flags & TRACE_EXPORT_MARKER)
302                 static_branch_inc(&trace_marker_exports_enabled);
303 }
304
305 static inline void ftrace_exports_disable(struct trace_export *export)
306 {
307         if (export->flags & TRACE_EXPORT_FUNCTION)
308                 static_branch_dec(&trace_function_exports_enabled);
309
310         if (export->flags & TRACE_EXPORT_EVENT)
311                 static_branch_dec(&trace_event_exports_enabled);
312
313         if (export->flags & TRACE_EXPORT_MARKER)
314                 static_branch_dec(&trace_marker_exports_enabled);
315 }
316
317 static void ftrace_exports(struct ring_buffer_event *event, int flag)
318 {
319         struct trace_export *export;
320
321         preempt_disable_notrace();
322
323         export = rcu_dereference_raw_check(ftrace_exports_list);
324         while (export) {
325                 trace_process_export(export, event, flag);
326                 export = rcu_dereference_raw_check(export->next);
327         }
328
329         preempt_enable_notrace();
330 }
331
332 static inline void
333 add_trace_export(struct trace_export **list, struct trace_export *export)
334 {
335         rcu_assign_pointer(export->next, *list);
336         /*
337          * We are entering export into the list but another
338          * CPU might be walking that list. We need to make sure
339          * the export->next pointer is valid before another CPU sees
340          * the export pointer included into the list.
341          */
342         rcu_assign_pointer(*list, export);
343 }
344
345 static inline int
346 rm_trace_export(struct trace_export **list, struct trace_export *export)
347 {
348         struct trace_export **p;
349
350         for (p = list; *p != NULL; p = &(*p)->next)
351                 if (*p == export)
352                         break;
353
354         if (*p != export)
355                 return -1;
356
357         rcu_assign_pointer(*p, (*p)->next);
358
359         return 0;
360 }
361
362 static inline void
363 add_ftrace_export(struct trace_export **list, struct trace_export *export)
364 {
365         ftrace_exports_enable(export);
366
367         add_trace_export(list, export);
368 }
369
370 static inline int
371 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373         int ret;
374
375         ret = rm_trace_export(list, export);
376         ftrace_exports_disable(export);
377
378         return ret;
379 }
380
381 int register_ftrace_export(struct trace_export *export)
382 {
383         if (WARN_ON_ONCE(!export->write))
384                 return -1;
385
386         mutex_lock(&ftrace_export_lock);
387
388         add_ftrace_export(&ftrace_exports_list, export);
389
390         mutex_unlock(&ftrace_export_lock);
391
392         return 0;
393 }
394 EXPORT_SYMBOL_GPL(register_ftrace_export);
395
396 int unregister_ftrace_export(struct trace_export *export)
397 {
398         int ret;
399
400         mutex_lock(&ftrace_export_lock);
401
402         ret = rm_ftrace_export(&ftrace_exports_list, export);
403
404         mutex_unlock(&ftrace_export_lock);
405
406         return ret;
407 }
408 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
409
410 /* trace_flags holds trace_options default values */
411 #define TRACE_DEFAULT_FLAGS                                             \
412         (FUNCTION_DEFAULT_FLAGS |                                       \
413          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
414          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
415          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
416          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
417
418 /* trace_options that are only supported by global_trace */
419 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
420                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
421
422 /* trace_flags that are default zero for instances */
423 #define ZEROED_TRACE_FLAGS \
424         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
425
426 /*
427  * The global_trace is the descriptor that holds the top-level tracing
428  * buffers for the live tracing.
429  */
430 static struct trace_array global_trace = {
431         .trace_flags = TRACE_DEFAULT_FLAGS,
432 };
433
434 LIST_HEAD(ftrace_trace_arrays);
435
436 int trace_array_get(struct trace_array *this_tr)
437 {
438         struct trace_array *tr;
439         int ret = -ENODEV;
440
441         mutex_lock(&trace_types_lock);
442         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
443                 if (tr == this_tr) {
444                         tr->ref++;
445                         ret = 0;
446                         break;
447                 }
448         }
449         mutex_unlock(&trace_types_lock);
450
451         return ret;
452 }
453
454 static void __trace_array_put(struct trace_array *this_tr)
455 {
456         WARN_ON(!this_tr->ref);
457         this_tr->ref--;
458 }
459
460 /**
461  * trace_array_put - Decrement the reference counter for this trace array.
462  *
463  * NOTE: Use this when we no longer need the trace array returned by
464  * trace_array_get_by_name(). This ensures the trace array can be later
465  * destroyed.
466  *
467  */
468 void trace_array_put(struct trace_array *this_tr)
469 {
470         if (!this_tr)
471                 return;
472
473         mutex_lock(&trace_types_lock);
474         __trace_array_put(this_tr);
475         mutex_unlock(&trace_types_lock);
476 }
477 EXPORT_SYMBOL_GPL(trace_array_put);
478
479 int tracing_check_open_get_tr(struct trace_array *tr)
480 {
481         int ret;
482
483         ret = security_locked_down(LOCKDOWN_TRACEFS);
484         if (ret)
485                 return ret;
486
487         if (tracing_disabled)
488                 return -ENODEV;
489
490         if (tr && trace_array_get(tr) < 0)
491                 return -ENODEV;
492
493         return 0;
494 }
495
496 int call_filter_check_discard(struct trace_event_call *call, void *rec,
497                               struct trace_buffer *buffer,
498                               struct ring_buffer_event *event)
499 {
500         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
501             !filter_match_preds(call->filter, rec)) {
502                 __trace_event_discard_commit(buffer, event);
503                 return 1;
504         }
505
506         return 0;
507 }
508
509 void trace_free_pid_list(struct trace_pid_list *pid_list)
510 {
511         vfree(pid_list->pids);
512         kfree(pid_list);
513 }
514
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525         /*
526          * If pid_max changed after filtered_pids was created, we
527          * by default ignore all pids greater than the previous pid_max.
528          */
529         if (search_pid >= filtered_pids->pid_max)
530                 return false;
531
532         return test_bit(search_pid, filtered_pids->pids);
533 }
534
535 /**
536  * trace_ignore_this_task - should a task be ignored for tracing
537  * @filtered_pids: The list of pids to check
538  * @task: The task that should be ignored if not filtered
539  *
540  * Checks if @task should be traced or not from @filtered_pids.
541  * Returns true if @task should *NOT* be traced.
542  * Returns false if @task should be traced.
543  */
544 bool
545 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546                        struct trace_pid_list *filtered_no_pids,
547                        struct task_struct *task)
548 {
549         /*
550          * If filterd_no_pids is not empty, and the task's pid is listed
551          * in filtered_no_pids, then return true.
552          * Otherwise, if filtered_pids is empty, that means we can
553          * trace all tasks. If it has content, then only trace pids
554          * within filtered_pids.
555          */
556
557         return (filtered_pids &&
558                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
559                 (filtered_no_pids &&
560                  trace_find_filtered_pid(filtered_no_pids, task->pid));
561 }
562
563 /**
564  * trace_filter_add_remove_task - Add or remove a task from a pid_list
565  * @pid_list: The list to modify
566  * @self: The current task for fork or NULL for exit
567  * @task: The task to add or remove
568  *
569  * If adding a task, if @self is defined, the task is only added if @self
570  * is also included in @pid_list. This happens on fork and tasks should
571  * only be added when the parent is listed. If @self is NULL, then the
572  * @task pid will be removed from the list, which would happen on exit
573  * of a task.
574  */
575 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
576                                   struct task_struct *self,
577                                   struct task_struct *task)
578 {
579         if (!pid_list)
580                 return;
581
582         /* For forks, we only add if the forking task is listed */
583         if (self) {
584                 if (!trace_find_filtered_pid(pid_list, self->pid))
585                         return;
586         }
587
588         /* Sorry, but we don't support pid_max changing after setting */
589         if (task->pid >= pid_list->pid_max)
590                 return;
591
592         /* "self" is set for forks, and NULL for exits */
593         if (self)
594                 set_bit(task->pid, pid_list->pids);
595         else
596                 clear_bit(task->pid, pid_list->pids);
597 }
598
599 /**
600  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
601  * @pid_list: The pid list to show
602  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
603  * @pos: The position of the file
604  *
605  * This is used by the seq_file "next" operation to iterate the pids
606  * listed in a trace_pid_list structure.
607  *
608  * Returns the pid+1 as we want to display pid of zero, but NULL would
609  * stop the iteration.
610  */
611 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
612 {
613         unsigned long pid = (unsigned long)v;
614
615         (*pos)++;
616
617         /* pid already is +1 of the actual prevous bit */
618         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
619
620         /* Return pid + 1 to allow zero to be represented */
621         if (pid < pid_list->pid_max)
622                 return (void *)(pid + 1);
623
624         return NULL;
625 }
626
627 /**
628  * trace_pid_start - Used for seq_file to start reading pid lists
629  * @pid_list: The pid list to show
630  * @pos: The position of the file
631  *
632  * This is used by seq_file "start" operation to start the iteration
633  * of listing pids.
634  *
635  * Returns the pid+1 as we want to display pid of zero, but NULL would
636  * stop the iteration.
637  */
638 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
639 {
640         unsigned long pid;
641         loff_t l = 0;
642
643         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
644         if (pid >= pid_list->pid_max)
645                 return NULL;
646
647         /* Return pid + 1 so that zero can be the exit value */
648         for (pid++; pid && l < *pos;
649              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
650                 ;
651         return (void *)pid;
652 }
653
654 /**
655  * trace_pid_show - show the current pid in seq_file processing
656  * @m: The seq_file structure to write into
657  * @v: A void pointer of the pid (+1) value to display
658  *
659  * Can be directly used by seq_file operations to display the current
660  * pid value.
661  */
662 int trace_pid_show(struct seq_file *m, void *v)
663 {
664         unsigned long pid = (unsigned long)v - 1;
665
666         seq_printf(m, "%lu\n", pid);
667         return 0;
668 }
669
670 /* 128 should be much more than enough */
671 #define PID_BUF_SIZE            127
672
673 int trace_pid_write(struct trace_pid_list *filtered_pids,
674                     struct trace_pid_list **new_pid_list,
675                     const char __user *ubuf, size_t cnt)
676 {
677         struct trace_pid_list *pid_list;
678         struct trace_parser parser;
679         unsigned long val;
680         int nr_pids = 0;
681         ssize_t read = 0;
682         ssize_t ret = 0;
683         loff_t pos;
684         pid_t pid;
685
686         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
687                 return -ENOMEM;
688
689         /*
690          * Always recreate a new array. The write is an all or nothing
691          * operation. Always create a new array when adding new pids by
692          * the user. If the operation fails, then the current list is
693          * not modified.
694          */
695         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
696         if (!pid_list) {
697                 trace_parser_put(&parser);
698                 return -ENOMEM;
699         }
700
701         pid_list->pid_max = READ_ONCE(pid_max);
702
703         /* Only truncating will shrink pid_max */
704         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
705                 pid_list->pid_max = filtered_pids->pid_max;
706
707         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
708         if (!pid_list->pids) {
709                 trace_parser_put(&parser);
710                 kfree(pid_list);
711                 return -ENOMEM;
712         }
713
714         if (filtered_pids) {
715                 /* copy the current bits to the new max */
716                 for_each_set_bit(pid, filtered_pids->pids,
717                                  filtered_pids->pid_max) {
718                         set_bit(pid, pid_list->pids);
719                         nr_pids++;
720                 }
721         }
722
723         while (cnt > 0) {
724
725                 pos = 0;
726
727                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
728                 if (ret < 0 || !trace_parser_loaded(&parser))
729                         break;
730
731                 read += ret;
732                 ubuf += ret;
733                 cnt -= ret;
734
735                 ret = -EINVAL;
736                 if (kstrtoul(parser.buffer, 0, &val))
737                         break;
738                 if (val >= pid_list->pid_max)
739                         break;
740
741                 pid = (pid_t)val;
742
743                 set_bit(pid, pid_list->pids);
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_free_pid_list(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_free_pid_list(pid_list);
759                 read = ret;
760                 pid_list = NULL;
761         }
762
763         *new_pid_list = pid_list;
764
765         return read;
766 }
767
768 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
769 {
770         u64 ts;
771
772         /* Early boot up does not have a buffer yet */
773         if (!buf->buffer)
774                 return trace_clock_local();
775
776         ts = ring_buffer_time_stamp(buf->buffer, cpu);
777         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
778
779         return ts;
780 }
781
782 u64 ftrace_now(int cpu)
783 {
784         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
785 }
786
787 /**
788  * tracing_is_enabled - Show if global_trace has been disabled
789  *
790  * Shows if the global trace has been enabled or not. It uses the
791  * mirror flag "buffer_disabled" to be used in fast paths such as for
792  * the irqsoff tracer. But it may be inaccurate due to races. If you
793  * need to know the accurate state, use tracing_is_on() which is a little
794  * slower, but accurate.
795  */
796 int tracing_is_enabled(void)
797 {
798         /*
799          * For quick access (irqsoff uses this in fast path), just
800          * return the mirror variable of the state of the ring buffer.
801          * It's a little racy, but we don't really care.
802          */
803         smp_rmb();
804         return !global_trace.buffer_disabled;
805 }
806
807 /*
808  * trace_buf_size is the size in bytes that is allocated
809  * for a buffer. Note, the number of bytes is always rounded
810  * to page size.
811  *
812  * This number is purposely set to a low number of 16384.
813  * If the dump on oops happens, it will be much appreciated
814  * to not have to wait for all that output. Anyway this can be
815  * boot time and run time configurable.
816  */
817 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
818
819 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
820
821 /* trace_types holds a link list of available tracers. */
822 static struct tracer            *trace_types __read_mostly;
823
824 /*
825  * trace_types_lock is used to protect the trace_types list.
826  */
827 DEFINE_MUTEX(trace_types_lock);
828
829 /*
830  * serialize the access of the ring buffer
831  *
832  * ring buffer serializes readers, but it is low level protection.
833  * The validity of the events (which returns by ring_buffer_peek() ..etc)
834  * are not protected by ring buffer.
835  *
836  * The content of events may become garbage if we allow other process consumes
837  * these events concurrently:
838  *   A) the page of the consumed events may become a normal page
839  *      (not reader page) in ring buffer, and this page will be rewrited
840  *      by events producer.
841  *   B) The page of the consumed events may become a page for splice_read,
842  *      and this page will be returned to system.
843  *
844  * These primitives allow multi process access to different cpu ring buffer
845  * concurrently.
846  *
847  * These primitives don't distinguish read-only and read-consume access.
848  * Multi read-only access are also serialized.
849  */
850
851 #ifdef CONFIG_SMP
852 static DECLARE_RWSEM(all_cpu_access_lock);
853 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
854
855 static inline void trace_access_lock(int cpu)
856 {
857         if (cpu == RING_BUFFER_ALL_CPUS) {
858                 /* gain it for accessing the whole ring buffer. */
859                 down_write(&all_cpu_access_lock);
860         } else {
861                 /* gain it for accessing a cpu ring buffer. */
862
863                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
864                 down_read(&all_cpu_access_lock);
865
866                 /* Secondly block other access to this @cpu ring buffer. */
867                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
868         }
869 }
870
871 static inline void trace_access_unlock(int cpu)
872 {
873         if (cpu == RING_BUFFER_ALL_CPUS) {
874                 up_write(&all_cpu_access_lock);
875         } else {
876                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
877                 up_read(&all_cpu_access_lock);
878         }
879 }
880
881 static inline void trace_access_lock_init(void)
882 {
883         int cpu;
884
885         for_each_possible_cpu(cpu)
886                 mutex_init(&per_cpu(cpu_access_lock, cpu));
887 }
888
889 #else
890
891 static DEFINE_MUTEX(access_lock);
892
893 static inline void trace_access_lock(int cpu)
894 {
895         (void)cpu;
896         mutex_lock(&access_lock);
897 }
898
899 static inline void trace_access_unlock(int cpu)
900 {
901         (void)cpu;
902         mutex_unlock(&access_lock);
903 }
904
905 static inline void trace_access_lock_init(void)
906 {
907 }
908
909 #endif
910
911 #ifdef CONFIG_STACKTRACE
912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
913                                  unsigned long flags,
914                                  int skip, int pc, struct pt_regs *regs);
915 static inline void ftrace_trace_stack(struct trace_array *tr,
916                                       struct trace_buffer *buffer,
917                                       unsigned long flags,
918                                       int skip, int pc, struct pt_regs *regs);
919
920 #else
921 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
922                                         unsigned long flags,
923                                         int skip, int pc, struct pt_regs *regs)
924 {
925 }
926 static inline void ftrace_trace_stack(struct trace_array *tr,
927                                       struct trace_buffer *buffer,
928                                       unsigned long flags,
929                                       int skip, int pc, struct pt_regs *regs)
930 {
931 }
932
933 #endif
934
935 static __always_inline void
936 trace_event_setup(struct ring_buffer_event *event,
937                   int type, unsigned long flags, int pc)
938 {
939         struct trace_entry *ent = ring_buffer_event_data(event);
940
941         tracing_generic_entry_update(ent, type, flags, pc);
942 }
943
944 static __always_inline struct ring_buffer_event *
945 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
946                           int type,
947                           unsigned long len,
948                           unsigned long flags, int pc)
949 {
950         struct ring_buffer_event *event;
951
952         event = ring_buffer_lock_reserve(buffer, len);
953         if (event != NULL)
954                 trace_event_setup(event, type, flags, pc);
955
956         return event;
957 }
958
959 void tracer_tracing_on(struct trace_array *tr)
960 {
961         if (tr->array_buffer.buffer)
962                 ring_buffer_record_on(tr->array_buffer.buffer);
963         /*
964          * This flag is looked at when buffers haven't been allocated
965          * yet, or by some tracers (like irqsoff), that just want to
966          * know if the ring buffer has been disabled, but it can handle
967          * races of where it gets disabled but we still do a record.
968          * As the check is in the fast path of the tracers, it is more
969          * important to be fast than accurate.
970          */
971         tr->buffer_disabled = 0;
972         /* Make the flag seen by readers */
973         smp_wmb();
974 }
975
976 /**
977  * tracing_on - enable tracing buffers
978  *
979  * This function enables tracing buffers that may have been
980  * disabled with tracing_off.
981  */
982 void tracing_on(void)
983 {
984         tracer_tracing_on(&global_trace);
985 }
986 EXPORT_SYMBOL_GPL(tracing_on);
987
988
989 static __always_inline void
990 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
991 {
992         __this_cpu_write(trace_taskinfo_save, true);
993
994         /* If this is the temp buffer, we need to commit fully */
995         if (this_cpu_read(trace_buffered_event) == event) {
996                 /* Length is in event->array[0] */
997                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
998                 /* Release the temp buffer */
999                 this_cpu_dec(trace_buffered_event_cnt);
1000         } else
1001                 ring_buffer_unlock_commit(buffer, event);
1002 }
1003
1004 /**
1005  * __trace_puts - write a constant string into the trace buffer.
1006  * @ip:    The address of the caller
1007  * @str:   The constant string to write
1008  * @size:  The size of the string.
1009  */
1010 int __trace_puts(unsigned long ip, const char *str, int size)
1011 {
1012         struct ring_buffer_event *event;
1013         struct trace_buffer *buffer;
1014         struct print_entry *entry;
1015         unsigned long irq_flags;
1016         int alloc;
1017         int pc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         pc = preempt_count();
1023
1024         if (unlikely(tracing_selftest_running || tracing_disabled))
1025                 return 0;
1026
1027         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1028
1029         local_save_flags(irq_flags);
1030         buffer = global_trace.array_buffer.buffer;
1031         ring_buffer_nest_start(buffer);
1032         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1033                                             irq_flags, pc);
1034         if (!event) {
1035                 size = 0;
1036                 goto out;
1037         }
1038
1039         entry = ring_buffer_event_data(event);
1040         entry->ip = ip;
1041
1042         memcpy(&entry->buf, str, size);
1043
1044         /* Add a newline if necessary */
1045         if (entry->buf[size - 1] != '\n') {
1046                 entry->buf[size] = '\n';
1047                 entry->buf[size + 1] = '\0';
1048         } else
1049                 entry->buf[size] = '\0';
1050
1051         __buffer_unlock_commit(buffer, event);
1052         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1053  out:
1054         ring_buffer_nest_end(buffer);
1055         return size;
1056 }
1057 EXPORT_SYMBOL_GPL(__trace_puts);
1058
1059 /**
1060  * __trace_bputs - write the pointer to a constant string into trace buffer
1061  * @ip:    The address of the caller
1062  * @str:   The constant string to write to the buffer to
1063  */
1064 int __trace_bputs(unsigned long ip, const char *str)
1065 {
1066         struct ring_buffer_event *event;
1067         struct trace_buffer *buffer;
1068         struct bputs_entry *entry;
1069         unsigned long irq_flags;
1070         int size = sizeof(struct bputs_entry);
1071         int ret = 0;
1072         int pc;
1073
1074         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1075                 return 0;
1076
1077         pc = preempt_count();
1078
1079         if (unlikely(tracing_selftest_running || tracing_disabled))
1080                 return 0;
1081
1082         local_save_flags(irq_flags);
1083         buffer = global_trace.array_buffer.buffer;
1084
1085         ring_buffer_nest_start(buffer);
1086         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1087                                             irq_flags, pc);
1088         if (!event)
1089                 goto out;
1090
1091         entry = ring_buffer_event_data(event);
1092         entry->ip                       = ip;
1093         entry->str                      = str;
1094
1095         __buffer_unlock_commit(buffer, event);
1096         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1097
1098         ret = 1;
1099  out:
1100         ring_buffer_nest_end(buffer);
1101         return ret;
1102 }
1103 EXPORT_SYMBOL_GPL(__trace_bputs);
1104
1105 #ifdef CONFIG_TRACER_SNAPSHOT
1106 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107                                            void *cond_data)
1108 {
1109         struct tracer *tracer = tr->current_trace;
1110         unsigned long flags;
1111
1112         if (in_nmi()) {
1113                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1114                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1115                 return;
1116         }
1117
1118         if (!tr->allocated_snapshot) {
1119                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1120                 internal_trace_puts("*** stopping trace here!   ***\n");
1121                 tracing_off();
1122                 return;
1123         }
1124
1125         /* Note, snapshot can not be used when the tracer uses it */
1126         if (tracer->use_max_tr) {
1127                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1128                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1129                 return;
1130         }
1131
1132         local_irq_save(flags);
1133         update_max_tr(tr, current, smp_processor_id(), cond_data);
1134         local_irq_restore(flags);
1135 }
1136
1137 void tracing_snapshot_instance(struct trace_array *tr)
1138 {
1139         tracing_snapshot_instance_cond(tr, NULL);
1140 }
1141
1142 /**
1143  * tracing_snapshot - take a snapshot of the current buffer.
1144  *
1145  * This causes a swap between the snapshot buffer and the current live
1146  * tracing buffer. You can use this to take snapshots of the live
1147  * trace when some condition is triggered, but continue to trace.
1148  *
1149  * Note, make sure to allocate the snapshot with either
1150  * a tracing_snapshot_alloc(), or by doing it manually
1151  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1152  *
1153  * If the snapshot buffer is not allocated, it will stop tracing.
1154  * Basically making a permanent snapshot.
1155  */
1156 void tracing_snapshot(void)
1157 {
1158         struct trace_array *tr = &global_trace;
1159
1160         tracing_snapshot_instance(tr);
1161 }
1162 EXPORT_SYMBOL_GPL(tracing_snapshot);
1163
1164 /**
1165  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166  * @tr:         The tracing instance to snapshot
1167  * @cond_data:  The data to be tested conditionally, and possibly saved
1168  *
1169  * This is the same as tracing_snapshot() except that the snapshot is
1170  * conditional - the snapshot will only happen if the
1171  * cond_snapshot.update() implementation receiving the cond_data
1172  * returns true, which means that the trace array's cond_snapshot
1173  * update() operation used the cond_data to determine whether the
1174  * snapshot should be taken, and if it was, presumably saved it along
1175  * with the snapshot.
1176  */
1177 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178 {
1179         tracing_snapshot_instance_cond(tr, cond_data);
1180 }
1181 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182
1183 /**
1184  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185  * @tr:         The tracing instance
1186  *
1187  * When the user enables a conditional snapshot using
1188  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189  * with the snapshot.  This accessor is used to retrieve it.
1190  *
1191  * Should not be called from cond_snapshot.update(), since it takes
1192  * the tr->max_lock lock, which the code calling
1193  * cond_snapshot.update() has already done.
1194  *
1195  * Returns the cond_data associated with the trace array's snapshot.
1196  */
1197 void *tracing_cond_snapshot_data(struct trace_array *tr)
1198 {
1199         void *cond_data = NULL;
1200
1201         local_irq_disable();
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208         local_irq_enable();
1209
1210         return cond_data;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1213
1214 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1215                                         struct array_buffer *size_buf, int cpu_id);
1216 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1217
1218 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1219 {
1220         int ret;
1221
1222         if (!tr->allocated_snapshot) {
1223
1224                 /* allocate spare buffer */
1225                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1226                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1227                 if (ret < 0)
1228                         return ret;
1229
1230                 tr->allocated_snapshot = true;
1231         }
1232
1233         return 0;
1234 }
1235
1236 static void free_snapshot(struct trace_array *tr)
1237 {
1238         /*
1239          * We don't free the ring buffer. instead, resize it because
1240          * The max_tr ring buffer has some state (e.g. ring->clock) and
1241          * we want preserve it.
1242          */
1243         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1244         set_buffer_entries(&tr->max_buffer, 1);
1245         tracing_reset_online_cpus(&tr->max_buffer);
1246         tr->allocated_snapshot = false;
1247 }
1248
1249 /**
1250  * tracing_alloc_snapshot - allocate snapshot buffer.
1251  *
1252  * This only allocates the snapshot buffer if it isn't already
1253  * allocated - it doesn't also take a snapshot.
1254  *
1255  * This is meant to be used in cases where the snapshot buffer needs
1256  * to be set up for events that can't sleep but need to be able to
1257  * trigger a snapshot.
1258  */
1259 int tracing_alloc_snapshot(void)
1260 {
1261         struct trace_array *tr = &global_trace;
1262         int ret;
1263
1264         ret = tracing_alloc_snapshot_instance(tr);
1265         WARN_ON(ret < 0);
1266
1267         return ret;
1268 }
1269 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1270
1271 /**
1272  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1273  *
1274  * This is similar to tracing_snapshot(), but it will allocate the
1275  * snapshot buffer if it isn't already allocated. Use this only
1276  * where it is safe to sleep, as the allocation may sleep.
1277  *
1278  * This causes a swap between the snapshot buffer and the current live
1279  * tracing buffer. You can use this to take snapshots of the live
1280  * trace when some condition is triggered, but continue to trace.
1281  */
1282 void tracing_snapshot_alloc(void)
1283 {
1284         int ret;
1285
1286         ret = tracing_alloc_snapshot();
1287         if (ret < 0)
1288                 return;
1289
1290         tracing_snapshot();
1291 }
1292 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1293
1294 /**
1295  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1296  * @tr:         The tracing instance
1297  * @cond_data:  User data to associate with the snapshot
1298  * @update:     Implementation of the cond_snapshot update function
1299  *
1300  * Check whether the conditional snapshot for the given instance has
1301  * already been enabled, or if the current tracer is already using a
1302  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1303  * save the cond_data and update function inside.
1304  *
1305  * Returns 0 if successful, error otherwise.
1306  */
1307 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1308                                  cond_update_fn_t update)
1309 {
1310         struct cond_snapshot *cond_snapshot;
1311         int ret = 0;
1312
1313         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1314         if (!cond_snapshot)
1315                 return -ENOMEM;
1316
1317         cond_snapshot->cond_data = cond_data;
1318         cond_snapshot->update = update;
1319
1320         mutex_lock(&trace_types_lock);
1321
1322         ret = tracing_alloc_snapshot_instance(tr);
1323         if (ret)
1324                 goto fail_unlock;
1325
1326         if (tr->current_trace->use_max_tr) {
1327                 ret = -EBUSY;
1328                 goto fail_unlock;
1329         }
1330
1331         /*
1332          * The cond_snapshot can only change to NULL without the
1333          * trace_types_lock. We don't care if we race with it going
1334          * to NULL, but we want to make sure that it's not set to
1335          * something other than NULL when we get here, which we can
1336          * do safely with only holding the trace_types_lock and not
1337          * having to take the max_lock.
1338          */
1339         if (tr->cond_snapshot) {
1340                 ret = -EBUSY;
1341                 goto fail_unlock;
1342         }
1343
1344         local_irq_disable();
1345         arch_spin_lock(&tr->max_lock);
1346         tr->cond_snapshot = cond_snapshot;
1347         arch_spin_unlock(&tr->max_lock);
1348         local_irq_enable();
1349
1350         mutex_unlock(&trace_types_lock);
1351
1352         return ret;
1353
1354  fail_unlock:
1355         mutex_unlock(&trace_types_lock);
1356         kfree(cond_snapshot);
1357         return ret;
1358 }
1359 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1360
1361 /**
1362  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1363  * @tr:         The tracing instance
1364  *
1365  * Check whether the conditional snapshot for the given instance is
1366  * enabled; if so, free the cond_snapshot associated with it,
1367  * otherwise return -EINVAL.
1368  *
1369  * Returns 0 if successful, error otherwise.
1370  */
1371 int tracing_snapshot_cond_disable(struct trace_array *tr)
1372 {
1373         int ret = 0;
1374
1375         local_irq_disable();
1376         arch_spin_lock(&tr->max_lock);
1377
1378         if (!tr->cond_snapshot)
1379                 ret = -EINVAL;
1380         else {
1381                 kfree(tr->cond_snapshot);
1382                 tr->cond_snapshot = NULL;
1383         }
1384
1385         arch_spin_unlock(&tr->max_lock);
1386         local_irq_enable();
1387
1388         return ret;
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1391 #else
1392 void tracing_snapshot(void)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot);
1397 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1402 int tracing_alloc_snapshot(void)
1403 {
1404         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1405         return -ENODEV;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1408 void tracing_snapshot_alloc(void)
1409 {
1410         /* Give warning */
1411         tracing_snapshot();
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1414 void *tracing_cond_snapshot_data(struct trace_array *tr)
1415 {
1416         return NULL;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1419 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1420 {
1421         return -ENODEV;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1424 int tracing_snapshot_cond_disable(struct trace_array *tr)
1425 {
1426         return false;
1427 }
1428 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1429 #endif /* CONFIG_TRACER_SNAPSHOT */
1430
1431 void tracer_tracing_off(struct trace_array *tr)
1432 {
1433         if (tr->array_buffer.buffer)
1434                 ring_buffer_record_off(tr->array_buffer.buffer);
1435         /*
1436          * This flag is looked at when buffers haven't been allocated
1437          * yet, or by some tracers (like irqsoff), that just want to
1438          * know if the ring buffer has been disabled, but it can handle
1439          * races of where it gets disabled but we still do a record.
1440          * As the check is in the fast path of the tracers, it is more
1441          * important to be fast than accurate.
1442          */
1443         tr->buffer_disabled = 1;
1444         /* Make the flag seen by readers */
1445         smp_wmb();
1446 }
1447
1448 /**
1449  * tracing_off - turn off tracing buffers
1450  *
1451  * This function stops the tracing buffers from recording data.
1452  * It does not disable any overhead the tracers themselves may
1453  * be causing. This function simply causes all recording to
1454  * the ring buffers to fail.
1455  */
1456 void tracing_off(void)
1457 {
1458         tracer_tracing_off(&global_trace);
1459 }
1460 EXPORT_SYMBOL_GPL(tracing_off);
1461
1462 void disable_trace_on_warning(void)
1463 {
1464         if (__disable_trace_on_warning) {
1465                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1466                         "Disabling tracing due to warning\n");
1467                 tracing_off();
1468         }
1469 }
1470
1471 /**
1472  * tracer_tracing_is_on - show real state of ring buffer enabled
1473  * @tr : the trace array to know if ring buffer is enabled
1474  *
1475  * Shows real state of the ring buffer if it is enabled or not.
1476  */
1477 bool tracer_tracing_is_on(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1481         return !tr->buffer_disabled;
1482 }
1483
1484 /**
1485  * tracing_is_on - show state of ring buffers enabled
1486  */
1487 int tracing_is_on(void)
1488 {
1489         return tracer_tracing_is_on(&global_trace);
1490 }
1491 EXPORT_SYMBOL_GPL(tracing_is_on);
1492
1493 static int __init set_buf_size(char *str)
1494 {
1495         unsigned long buf_size;
1496
1497         if (!str)
1498                 return 0;
1499         buf_size = memparse(str, &str);
1500         /*
1501          * nr_entries can not be zero and the startup
1502          * tests require some buffer space. Therefore
1503          * ensure we have at least 4096 bytes of buffer.
1504          */
1505         trace_buf_size = max(4096UL, buf_size);
1506         return 1;
1507 }
1508 __setup("trace_buf_size=", set_buf_size);
1509
1510 static int __init set_tracing_thresh(char *str)
1511 {
1512         unsigned long threshold;
1513         int ret;
1514
1515         if (!str)
1516                 return 0;
1517         ret = kstrtoul(str, 0, &threshold);
1518         if (ret < 0)
1519                 return 0;
1520         tracing_thresh = threshold * 1000;
1521         return 1;
1522 }
1523 __setup("tracing_thresh=", set_tracing_thresh);
1524
1525 unsigned long nsecs_to_usecs(unsigned long nsecs)
1526 {
1527         return nsecs / 1000;
1528 }
1529
1530 /*
1531  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1532  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1533  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1534  * of strings in the order that the evals (enum) were defined.
1535  */
1536 #undef C
1537 #define C(a, b) b
1538
1539 /* These must match the bit postions in trace_iterator_flags */
1540 static const char *trace_options[] = {
1541         TRACE_FLAGS
1542         NULL
1543 };
1544
1545 static struct {
1546         u64 (*func)(void);
1547         const char *name;
1548         int in_ns;              /* is this clock in nanoseconds? */
1549 } trace_clocks[] = {
1550         { trace_clock_local,            "local",        1 },
1551         { trace_clock_global,           "global",       1 },
1552         { trace_clock_counter,          "counter",      0 },
1553         { trace_clock_jiffies,          "uptime",       0 },
1554         { trace_clock,                  "perf",         1 },
1555         { ktime_get_mono_fast_ns,       "mono",         1 },
1556         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1557         { ktime_get_boot_fast_ns,       "boot",         1 },
1558         ARCH_TRACE_CLOCKS
1559 };
1560
1561 bool trace_clock_in_ns(struct trace_array *tr)
1562 {
1563         if (trace_clocks[tr->clock_id].in_ns)
1564                 return true;
1565
1566         return false;
1567 }
1568
1569 /*
1570  * trace_parser_get_init - gets the buffer for trace parser
1571  */
1572 int trace_parser_get_init(struct trace_parser *parser, int size)
1573 {
1574         memset(parser, 0, sizeof(*parser));
1575
1576         parser->buffer = kmalloc(size, GFP_KERNEL);
1577         if (!parser->buffer)
1578                 return 1;
1579
1580         parser->size = size;
1581         return 0;
1582 }
1583
1584 /*
1585  * trace_parser_put - frees the buffer for trace parser
1586  */
1587 void trace_parser_put(struct trace_parser *parser)
1588 {
1589         kfree(parser->buffer);
1590         parser->buffer = NULL;
1591 }
1592
1593 /*
1594  * trace_get_user - reads the user input string separated by  space
1595  * (matched by isspace(ch))
1596  *
1597  * For each string found the 'struct trace_parser' is updated,
1598  * and the function returns.
1599  *
1600  * Returns number of bytes read.
1601  *
1602  * See kernel/trace/trace.h for 'struct trace_parser' details.
1603  */
1604 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1605         size_t cnt, loff_t *ppos)
1606 {
1607         char ch;
1608         size_t read = 0;
1609         ssize_t ret;
1610
1611         if (!*ppos)
1612                 trace_parser_clear(parser);
1613
1614         ret = get_user(ch, ubuf++);
1615         if (ret)
1616                 goto out;
1617
1618         read++;
1619         cnt--;
1620
1621         /*
1622          * The parser is not finished with the last write,
1623          * continue reading the user input without skipping spaces.
1624          */
1625         if (!parser->cont) {
1626                 /* skip white space */
1627                 while (cnt && isspace(ch)) {
1628                         ret = get_user(ch, ubuf++);
1629                         if (ret)
1630                                 goto out;
1631                         read++;
1632                         cnt--;
1633                 }
1634
1635                 parser->idx = 0;
1636
1637                 /* only spaces were written */
1638                 if (isspace(ch) || !ch) {
1639                         *ppos += read;
1640                         ret = read;
1641                         goto out;
1642                 }
1643         }
1644
1645         /* read the non-space input */
1646         while (cnt && !isspace(ch) && ch) {
1647                 if (parser->idx < parser->size - 1)
1648                         parser->buffer[parser->idx++] = ch;
1649                 else {
1650                         ret = -EINVAL;
1651                         goto out;
1652                 }
1653                 ret = get_user(ch, ubuf++);
1654                 if (ret)
1655                         goto out;
1656                 read++;
1657                 cnt--;
1658         }
1659
1660         /* We either got finished input or we have to wait for another call. */
1661         if (isspace(ch) || !ch) {
1662                 parser->buffer[parser->idx] = 0;
1663                 parser->cont = false;
1664         } else if (parser->idx < parser->size - 1) {
1665                 parser->cont = true;
1666                 parser->buffer[parser->idx++] = ch;
1667                 /* Make sure the parsed string always terminates with '\0'. */
1668                 parser->buffer[parser->idx] = 0;
1669         } else {
1670                 ret = -EINVAL;
1671                 goto out;
1672         }
1673
1674         *ppos += read;
1675         ret = read;
1676
1677 out:
1678         return ret;
1679 }
1680
1681 /* TODO add a seq_buf_to_buffer() */
1682 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1683 {
1684         int len;
1685
1686         if (trace_seq_used(s) <= s->seq.readpos)
1687                 return -EBUSY;
1688
1689         len = trace_seq_used(s) - s->seq.readpos;
1690         if (cnt > len)
1691                 cnt = len;
1692         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1693
1694         s->seq.readpos += cnt;
1695         return cnt;
1696 }
1697
1698 unsigned long __read_mostly     tracing_thresh;
1699 static const struct file_operations tracing_max_lat_fops;
1700
1701 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1702         defined(CONFIG_FSNOTIFY)
1703
1704 static struct workqueue_struct *fsnotify_wq;
1705
1706 static void latency_fsnotify_workfn(struct work_struct *work)
1707 {
1708         struct trace_array *tr = container_of(work, struct trace_array,
1709                                               fsnotify_work);
1710         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1711 }
1712
1713 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1714 {
1715         struct trace_array *tr = container_of(iwork, struct trace_array,
1716                                               fsnotify_irqwork);
1717         queue_work(fsnotify_wq, &tr->fsnotify_work);
1718 }
1719
1720 static void trace_create_maxlat_file(struct trace_array *tr,
1721                                      struct dentry *d_tracer)
1722 {
1723         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1724         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1725         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1726                                               d_tracer, &tr->max_latency,
1727                                               &tracing_max_lat_fops);
1728 }
1729
1730 __init static int latency_fsnotify_init(void)
1731 {
1732         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1733                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1734         if (!fsnotify_wq) {
1735                 pr_err("Unable to allocate tr_max_lat_wq\n");
1736                 return -ENOMEM;
1737         }
1738         return 0;
1739 }
1740
1741 late_initcall_sync(latency_fsnotify_init);
1742
1743 void latency_fsnotify(struct trace_array *tr)
1744 {
1745         if (!fsnotify_wq)
1746                 return;
1747         /*
1748          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1749          * possible that we are called from __schedule() or do_idle(), which
1750          * could cause a deadlock.
1751          */
1752         irq_work_queue(&tr->fsnotify_irqwork);
1753 }
1754
1755 /*
1756  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1757  *  defined(CONFIG_FSNOTIFY)
1758  */
1759 #else
1760
1761 #define trace_create_maxlat_file(tr, d_tracer)                          \
1762         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1763                           &tr->max_latency, &tracing_max_lat_fops)
1764
1765 #endif
1766
1767 #ifdef CONFIG_TRACER_MAX_TRACE
1768 /*
1769  * Copy the new maximum trace into the separate maximum-trace
1770  * structure. (this way the maximum trace is permanently saved,
1771  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1772  */
1773 static void
1774 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1775 {
1776         struct array_buffer *trace_buf = &tr->array_buffer;
1777         struct array_buffer *max_buf = &tr->max_buffer;
1778         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1779         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1780
1781         max_buf->cpu = cpu;
1782         max_buf->time_start = data->preempt_timestamp;
1783
1784         max_data->saved_latency = tr->max_latency;
1785         max_data->critical_start = data->critical_start;
1786         max_data->critical_end = data->critical_end;
1787
1788         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1789         max_data->pid = tsk->pid;
1790         /*
1791          * If tsk == current, then use current_uid(), as that does not use
1792          * RCU. The irq tracer can be called out of RCU scope.
1793          */
1794         if (tsk == current)
1795                 max_data->uid = current_uid();
1796         else
1797                 max_data->uid = task_uid(tsk);
1798
1799         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1800         max_data->policy = tsk->policy;
1801         max_data->rt_priority = tsk->rt_priority;
1802
1803         /* record this tasks comm */
1804         tracing_record_cmdline(tsk);
1805         latency_fsnotify(tr);
1806 }
1807
1808 /**
1809  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1810  * @tr: tracer
1811  * @tsk: the task with the latency
1812  * @cpu: The cpu that initiated the trace.
1813  * @cond_data: User data associated with a conditional snapshot
1814  *
1815  * Flip the buffers between the @tr and the max_tr and record information
1816  * about which task was the cause of this latency.
1817  */
1818 void
1819 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1820               void *cond_data)
1821 {
1822         if (tr->stop_count)
1823                 return;
1824
1825         WARN_ON_ONCE(!irqs_disabled());
1826
1827         if (!tr->allocated_snapshot) {
1828                 /* Only the nop tracer should hit this when disabling */
1829                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1830                 return;
1831         }
1832
1833         arch_spin_lock(&tr->max_lock);
1834
1835         /* Inherit the recordable setting from array_buffer */
1836         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1837                 ring_buffer_record_on(tr->max_buffer.buffer);
1838         else
1839                 ring_buffer_record_off(tr->max_buffer.buffer);
1840
1841 #ifdef CONFIG_TRACER_SNAPSHOT
1842         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1843                 goto out_unlock;
1844 #endif
1845         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1846
1847         __update_max_tr(tr, tsk, cpu);
1848
1849  out_unlock:
1850         arch_spin_unlock(&tr->max_lock);
1851 }
1852
1853 /**
1854  * update_max_tr_single - only copy one trace over, and reset the rest
1855  * @tr: tracer
1856  * @tsk: task with the latency
1857  * @cpu: the cpu of the buffer to copy.
1858  *
1859  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1860  */
1861 void
1862 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1863 {
1864         int ret;
1865
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870         if (!tr->allocated_snapshot) {
1871                 /* Only the nop tracer should hit this when disabling */
1872                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1873                 return;
1874         }
1875
1876         arch_spin_lock(&tr->max_lock);
1877
1878         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1879
1880         if (ret == -EBUSY) {
1881                 /*
1882                  * We failed to swap the buffer due to a commit taking
1883                  * place on this CPU. We fail to record, but we reset
1884                  * the max trace buffer (no one writes directly to it)
1885                  * and flag that it failed.
1886                  * Another reason is resize is in progress.
1887                  */
1888                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1889                         "Failed to swap buffers due to commit or resize in progress\n");
1890         }
1891
1892         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1893
1894         __update_max_tr(tr, tsk, cpu);
1895         arch_spin_unlock(&tr->max_lock);
1896
1897         /* Any waiters on the old snapshot buffer need to wake up */
1898         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1899 }
1900 #endif /* CONFIG_TRACER_MAX_TRACE */
1901
1902 static int wait_on_pipe(struct trace_iterator *iter, int full)
1903 {
1904         int ret;
1905
1906         /* Iterators are static, they should be filled or empty */
1907         if (trace_buffer_iter(iter, iter->cpu_file))
1908                 return 0;
1909
1910         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1911
1912 #ifdef CONFIG_TRACER_MAX_TRACE
1913         /*
1914          * Make sure this is still the snapshot buffer, as if a snapshot were
1915          * to happen, this would now be the main buffer.
1916          */
1917         if (iter->snapshot)
1918                 iter->array_buffer = &iter->tr->max_buffer;
1919 #endif
1920         return ret;
1921 }
1922
1923 #ifdef CONFIG_FTRACE_STARTUP_TEST
1924 static bool selftests_can_run;
1925
1926 struct trace_selftests {
1927         struct list_head                list;
1928         struct tracer                   *type;
1929 };
1930
1931 static LIST_HEAD(postponed_selftests);
1932
1933 static int save_selftest(struct tracer *type)
1934 {
1935         struct trace_selftests *selftest;
1936
1937         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1938         if (!selftest)
1939                 return -ENOMEM;
1940
1941         selftest->type = type;
1942         list_add(&selftest->list, &postponed_selftests);
1943         return 0;
1944 }
1945
1946 static int run_tracer_selftest(struct tracer *type)
1947 {
1948         struct trace_array *tr = &global_trace;
1949         struct tracer *saved_tracer = tr->current_trace;
1950         int ret;
1951
1952         if (!type->selftest || tracing_selftest_disabled)
1953                 return 0;
1954
1955         /*
1956          * If a tracer registers early in boot up (before scheduling is
1957          * initialized and such), then do not run its selftests yet.
1958          * Instead, run it a little later in the boot process.
1959          */
1960         if (!selftests_can_run)
1961                 return save_selftest(type);
1962
1963         /*
1964          * Run a selftest on this tracer.
1965          * Here we reset the trace buffer, and set the current
1966          * tracer to be this tracer. The tracer can then run some
1967          * internal tracing to verify that everything is in order.
1968          * If we fail, we do not register this tracer.
1969          */
1970         tracing_reset_online_cpus(&tr->array_buffer);
1971
1972         tr->current_trace = type;
1973
1974 #ifdef CONFIG_TRACER_MAX_TRACE
1975         if (type->use_max_tr) {
1976                 /* If we expanded the buffers, make sure the max is expanded too */
1977                 if (ring_buffer_expanded)
1978                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1979                                            RING_BUFFER_ALL_CPUS);
1980                 tr->allocated_snapshot = true;
1981         }
1982 #endif
1983
1984         /* the test is responsible for initializing and enabling */
1985         pr_info("Testing tracer %s: ", type->name);
1986         ret = type->selftest(type, tr);
1987         /* the test is responsible for resetting too */
1988         tr->current_trace = saved_tracer;
1989         if (ret) {
1990                 printk(KERN_CONT "FAILED!\n");
1991                 /* Add the warning after printing 'FAILED' */
1992                 WARN_ON(1);
1993                 return -1;
1994         }
1995         /* Only reset on passing, to avoid touching corrupted buffers */
1996         tracing_reset_online_cpus(&tr->array_buffer);
1997
1998 #ifdef CONFIG_TRACER_MAX_TRACE
1999         if (type->use_max_tr) {
2000                 tr->allocated_snapshot = false;
2001
2002                 /* Shrink the max buffer again */
2003                 if (ring_buffer_expanded)
2004                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2005                                            RING_BUFFER_ALL_CPUS);
2006         }
2007 #endif
2008
2009         printk(KERN_CONT "PASSED\n");
2010         return 0;
2011 }
2012
2013 static __init int init_trace_selftests(void)
2014 {
2015         struct trace_selftests *p, *n;
2016         struct tracer *t, **last;
2017         int ret;
2018
2019         selftests_can_run = true;
2020
2021         mutex_lock(&trace_types_lock);
2022
2023         if (list_empty(&postponed_selftests))
2024                 goto out;
2025
2026         pr_info("Running postponed tracer tests:\n");
2027
2028         tracing_selftest_running = true;
2029         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2030                 /* This loop can take minutes when sanitizers are enabled, so
2031                  * lets make sure we allow RCU processing.
2032                  */
2033                 cond_resched();
2034                 ret = run_tracer_selftest(p->type);
2035                 /* If the test fails, then warn and remove from available_tracers */
2036                 if (ret < 0) {
2037                         WARN(1, "tracer: %s failed selftest, disabling\n",
2038                              p->type->name);
2039                         last = &trace_types;
2040                         for (t = trace_types; t; t = t->next) {
2041                                 if (t == p->type) {
2042                                         *last = t->next;
2043                                         break;
2044                                 }
2045                                 last = &t->next;
2046                         }
2047                 }
2048                 list_del(&p->list);
2049                 kfree(p);
2050         }
2051         tracing_selftest_running = false;
2052
2053  out:
2054         mutex_unlock(&trace_types_lock);
2055
2056         return 0;
2057 }
2058 core_initcall(init_trace_selftests);
2059 #else
2060 static inline int run_tracer_selftest(struct tracer *type)
2061 {
2062         return 0;
2063 }
2064 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2065
2066 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2067
2068 static void __init apply_trace_boot_options(void);
2069
2070 /**
2071  * register_tracer - register a tracer with the ftrace system.
2072  * @type: the plugin for the tracer
2073  *
2074  * Register a new plugin tracer.
2075  */
2076 int __init register_tracer(struct tracer *type)
2077 {
2078         struct tracer *t;
2079         int ret = 0;
2080
2081         if (!type->name) {
2082                 pr_info("Tracer must have a name\n");
2083                 return -1;
2084         }
2085
2086         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2087                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2088                 return -1;
2089         }
2090
2091         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2092                 pr_warn("Can not register tracer %s due to lockdown\n",
2093                            type->name);
2094                 return -EPERM;
2095         }
2096
2097         mutex_lock(&trace_types_lock);
2098
2099         tracing_selftest_running = true;
2100
2101         for (t = trace_types; t; t = t->next) {
2102                 if (strcmp(type->name, t->name) == 0) {
2103                         /* already found */
2104                         pr_info("Tracer %s already registered\n",
2105                                 type->name);
2106                         ret = -1;
2107                         goto out;
2108                 }
2109         }
2110
2111         if (!type->set_flag)
2112                 type->set_flag = &dummy_set_flag;
2113         if (!type->flags) {
2114                 /*allocate a dummy tracer_flags*/
2115                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2116                 if (!type->flags) {
2117                         ret = -ENOMEM;
2118                         goto out;
2119                 }
2120                 type->flags->val = 0;
2121                 type->flags->opts = dummy_tracer_opt;
2122         } else
2123                 if (!type->flags->opts)
2124                         type->flags->opts = dummy_tracer_opt;
2125
2126         /* store the tracer for __set_tracer_option */
2127         type->flags->trace = type;
2128
2129         ret = run_tracer_selftest(type);
2130         if (ret < 0)
2131                 goto out;
2132
2133         type->next = trace_types;
2134         trace_types = type;
2135         add_tracer_options(&global_trace, type);
2136
2137  out:
2138         tracing_selftest_running = false;
2139         mutex_unlock(&trace_types_lock);
2140
2141         if (ret || !default_bootup_tracer)
2142                 goto out_unlock;
2143
2144         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2145                 goto out_unlock;
2146
2147         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2148         /* Do we want this tracer to start on bootup? */
2149         tracing_set_tracer(&global_trace, type->name);
2150         default_bootup_tracer = NULL;
2151
2152         apply_trace_boot_options();
2153
2154         /* disable other selftests, since this will break it. */
2155         disable_tracing_selftest("running a tracer");
2156
2157  out_unlock:
2158         return ret;
2159 }
2160
2161 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2162 {
2163         struct trace_buffer *buffer = buf->buffer;
2164
2165         if (!buffer)
2166                 return;
2167
2168         ring_buffer_record_disable(buffer);
2169
2170         /* Make sure all commits have finished */
2171         synchronize_rcu();
2172         ring_buffer_reset_cpu(buffer, cpu);
2173
2174         ring_buffer_record_enable(buffer);
2175 }
2176
2177 void tracing_reset_online_cpus(struct array_buffer *buf)
2178 {
2179         struct trace_buffer *buffer = buf->buffer;
2180
2181         if (!buffer)
2182                 return;
2183
2184         ring_buffer_record_disable(buffer);
2185
2186         /* Make sure all commits have finished */
2187         synchronize_rcu();
2188
2189         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2190
2191         ring_buffer_reset_online_cpus(buffer);
2192
2193         ring_buffer_record_enable(buffer);
2194 }
2195
2196 /* Must have trace_types_lock held */
2197 void tracing_reset_all_online_cpus_unlocked(void)
2198 {
2199         struct trace_array *tr;
2200
2201         lockdep_assert_held(&trace_types_lock);
2202
2203         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2204                 if (!tr->clear_trace)
2205                         continue;
2206                 tr->clear_trace = false;
2207                 tracing_reset_online_cpus(&tr->array_buffer);
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209                 tracing_reset_online_cpus(&tr->max_buffer);
2210 #endif
2211         }
2212 }
2213
2214 void tracing_reset_all_online_cpus(void)
2215 {
2216         mutex_lock(&trace_types_lock);
2217         tracing_reset_all_online_cpus_unlocked();
2218         mutex_unlock(&trace_types_lock);
2219 }
2220
2221 /*
2222  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2223  * is the tgid last observed corresponding to pid=i.
2224  */
2225 static int *tgid_map;
2226
2227 /* The maximum valid index into tgid_map. */
2228 static size_t tgid_map_max;
2229
2230 #define SAVED_CMDLINES_DEFAULT 128
2231 #define NO_CMDLINE_MAP UINT_MAX
2232 /*
2233  * Preemption must be disabled before acquiring trace_cmdline_lock.
2234  * The various trace_arrays' max_lock must be acquired in a context
2235  * where interrupt is disabled.
2236  */
2237 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2238 struct saved_cmdlines_buffer {
2239         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2240         unsigned *map_cmdline_to_pid;
2241         unsigned cmdline_num;
2242         int cmdline_idx;
2243         char saved_cmdlines[];
2244 };
2245 static struct saved_cmdlines_buffer *savedcmd;
2246
2247 static inline char *get_saved_cmdlines(int idx)
2248 {
2249         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2250 }
2251
2252 static inline void set_cmdline(int idx, const char *cmdline)
2253 {
2254         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2255 }
2256
2257 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2258 {
2259         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2260
2261         kfree(s->map_cmdline_to_pid);
2262         kmemleak_free(s);
2263         free_pages((unsigned long)s, order);
2264 }
2265
2266 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2267 {
2268         struct saved_cmdlines_buffer *s;
2269         struct page *page;
2270         int orig_size, size;
2271         int order;
2272
2273         /* Figure out how much is needed to hold the given number of cmdlines */
2274         orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2275         order = get_order(orig_size);
2276         size = 1 << (order + PAGE_SHIFT);
2277         page = alloc_pages(GFP_KERNEL, order);
2278         if (!page)
2279                 return NULL;
2280
2281         s = page_address(page);
2282         kmemleak_alloc(s, size, 1, GFP_KERNEL);
2283         memset(s, 0, sizeof(*s));
2284
2285         /* Round up to actual allocation */
2286         val = (size - sizeof(*s)) / TASK_COMM_LEN;
2287         s->cmdline_num = val;
2288
2289         s->map_cmdline_to_pid = kmalloc_array(val,
2290                                               sizeof(*s->map_cmdline_to_pid),
2291                                               GFP_KERNEL);
2292         if (!s->map_cmdline_to_pid) {
2293                 free_saved_cmdlines_buffer(s);
2294                 return NULL;
2295         }
2296
2297         s->cmdline_idx = 0;
2298         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2299                sizeof(s->map_pid_to_cmdline));
2300         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2301                val * sizeof(*s->map_cmdline_to_pid));
2302
2303         return s;
2304 }
2305
2306 static int trace_create_savedcmd(void)
2307 {
2308         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2309
2310         return savedcmd ? 0 : -ENOMEM;
2311 }
2312
2313 int is_tracing_stopped(void)
2314 {
2315         return global_trace.stop_count;
2316 }
2317
2318 static void tracing_start_tr(struct trace_array *tr)
2319 {
2320         struct trace_buffer *buffer;
2321         unsigned long flags;
2322
2323         if (tracing_disabled)
2324                 return;
2325
2326         raw_spin_lock_irqsave(&tr->start_lock, flags);
2327         if (--tr->stop_count) {
2328                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2329                         /* Someone screwed up their debugging */
2330                         tr->stop_count = 0;
2331                 }
2332                 goto out;
2333         }
2334
2335         /* Prevent the buffers from switching */
2336         arch_spin_lock(&tr->max_lock);
2337
2338         buffer = tr->array_buffer.buffer;
2339         if (buffer)
2340                 ring_buffer_record_enable(buffer);
2341
2342 #ifdef CONFIG_TRACER_MAX_TRACE
2343         buffer = tr->max_buffer.buffer;
2344         if (buffer)
2345                 ring_buffer_record_enable(buffer);
2346 #endif
2347
2348         arch_spin_unlock(&tr->max_lock);
2349
2350  out:
2351         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2352 }
2353
2354 /**
2355  * tracing_start - quick start of the tracer
2356  *
2357  * If tracing is enabled but was stopped by tracing_stop,
2358  * this will start the tracer back up.
2359  */
2360 void tracing_start(void)
2361
2362 {
2363         return tracing_start_tr(&global_trace);
2364 }
2365
2366 static void tracing_stop_tr(struct trace_array *tr)
2367 {
2368         struct trace_buffer *buffer;
2369         unsigned long flags;
2370
2371         raw_spin_lock_irqsave(&tr->start_lock, flags);
2372         if (tr->stop_count++)
2373                 goto out;
2374
2375         /* Prevent the buffers from switching */
2376         arch_spin_lock(&tr->max_lock);
2377
2378         buffer = tr->array_buffer.buffer;
2379         if (buffer)
2380                 ring_buffer_record_disable(buffer);
2381
2382 #ifdef CONFIG_TRACER_MAX_TRACE
2383         buffer = tr->max_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386 #endif
2387
2388         arch_spin_unlock(&tr->max_lock);
2389
2390  out:
2391         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2392 }
2393
2394 /**
2395  * tracing_stop - quick stop of the tracer
2396  *
2397  * Light weight way to stop tracing. Use in conjunction with
2398  * tracing_start.
2399  */
2400 void tracing_stop(void)
2401 {
2402         return tracing_stop_tr(&global_trace);
2403 }
2404
2405 static int trace_save_cmdline(struct task_struct *tsk)
2406 {
2407         unsigned tpid, idx;
2408
2409         /* treat recording of idle task as a success */
2410         if (!tsk->pid)
2411                 return 1;
2412
2413         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2414
2415         /*
2416          * It's not the end of the world if we don't get
2417          * the lock, but we also don't want to spin
2418          * nor do we want to disable interrupts,
2419          * so if we miss here, then better luck next time.
2420          *
2421          * This is called within the scheduler and wake up, so interrupts
2422          * had better been disabled and run queue lock been held.
2423          */
2424         lockdep_assert_preemption_disabled();
2425         if (!arch_spin_trylock(&trace_cmdline_lock))
2426                 return 0;
2427
2428         idx = savedcmd->map_pid_to_cmdline[tpid];
2429         if (idx == NO_CMDLINE_MAP) {
2430                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2431
2432                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2433                 savedcmd->cmdline_idx = idx;
2434         }
2435
2436         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2437         set_cmdline(idx, tsk->comm);
2438
2439         arch_spin_unlock(&trace_cmdline_lock);
2440
2441         return 1;
2442 }
2443
2444 static void __trace_find_cmdline(int pid, char comm[])
2445 {
2446         unsigned map;
2447         int tpid;
2448
2449         if (!pid) {
2450                 strcpy(comm, "<idle>");
2451                 return;
2452         }
2453
2454         if (WARN_ON_ONCE(pid < 0)) {
2455                 strcpy(comm, "<XXX>");
2456                 return;
2457         }
2458
2459         tpid = pid & (PID_MAX_DEFAULT - 1);
2460         map = savedcmd->map_pid_to_cmdline[tpid];
2461         if (map != NO_CMDLINE_MAP) {
2462                 tpid = savedcmd->map_cmdline_to_pid[map];
2463                 if (tpid == pid) {
2464                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2465                         return;
2466                 }
2467         }
2468         strcpy(comm, "<...>");
2469 }
2470
2471 void trace_find_cmdline(int pid, char comm[])
2472 {
2473         preempt_disable();
2474         arch_spin_lock(&trace_cmdline_lock);
2475
2476         __trace_find_cmdline(pid, comm);
2477
2478         arch_spin_unlock(&trace_cmdline_lock);
2479         preempt_enable();
2480 }
2481
2482 static int *trace_find_tgid_ptr(int pid)
2483 {
2484         /*
2485          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2486          * if we observe a non-NULL tgid_map then we also observe the correct
2487          * tgid_map_max.
2488          */
2489         int *map = smp_load_acquire(&tgid_map);
2490
2491         if (unlikely(!map || pid > tgid_map_max))
2492                 return NULL;
2493
2494         return &map[pid];
2495 }
2496
2497 int trace_find_tgid(int pid)
2498 {
2499         int *ptr = trace_find_tgid_ptr(pid);
2500
2501         return ptr ? *ptr : 0;
2502 }
2503
2504 static int trace_save_tgid(struct task_struct *tsk)
2505 {
2506         int *ptr;
2507
2508         /* treat recording of idle task as a success */
2509         if (!tsk->pid)
2510                 return 1;
2511
2512         ptr = trace_find_tgid_ptr(tsk->pid);
2513         if (!ptr)
2514                 return 0;
2515
2516         *ptr = tsk->tgid;
2517         return 1;
2518 }
2519
2520 static bool tracing_record_taskinfo_skip(int flags)
2521 {
2522         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2523                 return true;
2524         if (!__this_cpu_read(trace_taskinfo_save))
2525                 return true;
2526         return false;
2527 }
2528
2529 /**
2530  * tracing_record_taskinfo - record the task info of a task
2531  *
2532  * @task:  task to record
2533  * @flags: TRACE_RECORD_CMDLINE for recording comm
2534  *         TRACE_RECORD_TGID for recording tgid
2535  */
2536 void tracing_record_taskinfo(struct task_struct *task, int flags)
2537 {
2538         bool done;
2539
2540         if (tracing_record_taskinfo_skip(flags))
2541                 return;
2542
2543         /*
2544          * Record as much task information as possible. If some fail, continue
2545          * to try to record the others.
2546          */
2547         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2548         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2549
2550         /* If recording any information failed, retry again soon. */
2551         if (!done)
2552                 return;
2553
2554         __this_cpu_write(trace_taskinfo_save, false);
2555 }
2556
2557 /**
2558  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2559  *
2560  * @prev: previous task during sched_switch
2561  * @next: next task during sched_switch
2562  * @flags: TRACE_RECORD_CMDLINE for recording comm
2563  *         TRACE_RECORD_TGID for recording tgid
2564  */
2565 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2566                                           struct task_struct *next, int flags)
2567 {
2568         bool done;
2569
2570         if (tracing_record_taskinfo_skip(flags))
2571                 return;
2572
2573         /*
2574          * Record as much task information as possible. If some fail, continue
2575          * to try to record the others.
2576          */
2577         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2578         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2579         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2580         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2581
2582         /* If recording any information failed, retry again soon. */
2583         if (!done)
2584                 return;
2585
2586         __this_cpu_write(trace_taskinfo_save, false);
2587 }
2588
2589 /* Helpers to record a specific task information */
2590 void tracing_record_cmdline(struct task_struct *task)
2591 {
2592         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2593 }
2594
2595 void tracing_record_tgid(struct task_struct *task)
2596 {
2597         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2598 }
2599
2600 /*
2601  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2602  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2603  * simplifies those functions and keeps them in sync.
2604  */
2605 enum print_line_t trace_handle_return(struct trace_seq *s)
2606 {
2607         return trace_seq_has_overflowed(s) ?
2608                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2609 }
2610 EXPORT_SYMBOL_GPL(trace_handle_return);
2611
2612 void
2613 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2614                              unsigned long flags, int pc)
2615 {
2616         struct task_struct *tsk = current;
2617
2618         entry->preempt_count            = pc & 0xff;
2619         entry->pid                      = (tsk) ? tsk->pid : 0;
2620         entry->type                     = type;
2621         entry->flags =
2622 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2623                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2624 #else
2625                 TRACE_FLAG_IRQS_NOSUPPORT |
2626 #endif
2627                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2628                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2629                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2630                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2631                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2632 }
2633 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2634
2635 struct ring_buffer_event *
2636 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2637                           int type,
2638                           unsigned long len,
2639                           unsigned long flags, int pc)
2640 {
2641         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2642 }
2643
2644 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2645 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2646 static int trace_buffered_event_ref;
2647
2648 /**
2649  * trace_buffered_event_enable - enable buffering events
2650  *
2651  * When events are being filtered, it is quicker to use a temporary
2652  * buffer to write the event data into if there's a likely chance
2653  * that it will not be committed. The discard of the ring buffer
2654  * is not as fast as committing, and is much slower than copying
2655  * a commit.
2656  *
2657  * When an event is to be filtered, allocate per cpu buffers to
2658  * write the event data into, and if the event is filtered and discarded
2659  * it is simply dropped, otherwise, the entire data is to be committed
2660  * in one shot.
2661  */
2662 void trace_buffered_event_enable(void)
2663 {
2664         struct ring_buffer_event *event;
2665         struct page *page;
2666         int cpu;
2667
2668         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2669
2670         if (trace_buffered_event_ref++)
2671                 return;
2672
2673         for_each_tracing_cpu(cpu) {
2674                 page = alloc_pages_node(cpu_to_node(cpu),
2675                                         GFP_KERNEL | __GFP_NORETRY, 0);
2676                 /* This is just an optimization and can handle failures */
2677                 if (!page) {
2678                         pr_err("Failed to allocate event buffer\n");
2679                         break;
2680                 }
2681
2682                 event = page_address(page);
2683                 memset(event, 0, sizeof(*event));
2684
2685                 per_cpu(trace_buffered_event, cpu) = event;
2686
2687                 preempt_disable();
2688                 if (cpu == smp_processor_id() &&
2689                     __this_cpu_read(trace_buffered_event) !=
2690                     per_cpu(trace_buffered_event, cpu))
2691                         WARN_ON_ONCE(1);
2692                 preempt_enable();
2693         }
2694 }
2695
2696 static void enable_trace_buffered_event(void *data)
2697 {
2698         /* Probably not needed, but do it anyway */
2699         smp_rmb();
2700         this_cpu_dec(trace_buffered_event_cnt);
2701 }
2702
2703 static void disable_trace_buffered_event(void *data)
2704 {
2705         this_cpu_inc(trace_buffered_event_cnt);
2706 }
2707
2708 /**
2709  * trace_buffered_event_disable - disable buffering events
2710  *
2711  * When a filter is removed, it is faster to not use the buffered
2712  * events, and to commit directly into the ring buffer. Free up
2713  * the temp buffers when there are no more users. This requires
2714  * special synchronization with current events.
2715  */
2716 void trace_buffered_event_disable(void)
2717 {
2718         int cpu;
2719
2720         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2721
2722         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2723                 return;
2724
2725         if (--trace_buffered_event_ref)
2726                 return;
2727
2728         /* For each CPU, set the buffer as used. */
2729         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2730                          NULL, true);
2731
2732         /* Wait for all current users to finish */
2733         synchronize_rcu();
2734
2735         for_each_tracing_cpu(cpu) {
2736                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2737                 per_cpu(trace_buffered_event, cpu) = NULL;
2738         }
2739
2740         /*
2741          * Wait for all CPUs that potentially started checking if they can use
2742          * their event buffer only after the previous synchronize_rcu() call and
2743          * they still read a valid pointer from trace_buffered_event. It must be
2744          * ensured they don't see cleared trace_buffered_event_cnt else they
2745          * could wrongly decide to use the pointed-to buffer which is now freed.
2746          */
2747         synchronize_rcu();
2748
2749         /* For each CPU, relinquish the buffer */
2750         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2751                          true);
2752 }
2753
2754 static struct trace_buffer *temp_buffer;
2755
2756 struct ring_buffer_event *
2757 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2758                           struct trace_event_file *trace_file,
2759                           int type, unsigned long len,
2760                           unsigned long flags, int pc)
2761 {
2762         struct ring_buffer_event *entry;
2763         int val;
2764
2765         *current_rb = trace_file->tr->array_buffer.buffer;
2766
2767         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2768              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2769             (entry = this_cpu_read(trace_buffered_event))) {
2770                 /* Try to use the per cpu buffer first */
2771                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2772                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2773                         trace_event_setup(entry, type, flags, pc);
2774                         entry->array[0] = len;
2775                         return entry;
2776                 }
2777                 this_cpu_dec(trace_buffered_event_cnt);
2778         }
2779
2780         entry = __trace_buffer_lock_reserve(*current_rb,
2781                                             type, len, flags, pc);
2782         /*
2783          * If tracing is off, but we have triggers enabled
2784          * we still need to look at the event data. Use the temp_buffer
2785          * to store the trace event for the trigger to use. It's recursive
2786          * safe and will not be recorded anywhere.
2787          */
2788         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2789                 *current_rb = temp_buffer;
2790                 entry = __trace_buffer_lock_reserve(*current_rb,
2791                                                     type, len, flags, pc);
2792         }
2793         return entry;
2794 }
2795 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2796
2797 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2798 static DEFINE_MUTEX(tracepoint_printk_mutex);
2799
2800 static void output_printk(struct trace_event_buffer *fbuffer)
2801 {
2802         struct trace_event_call *event_call;
2803         struct trace_event_file *file;
2804         struct trace_event *event;
2805         unsigned long flags;
2806         struct trace_iterator *iter = tracepoint_print_iter;
2807
2808         /* We should never get here if iter is NULL */
2809         if (WARN_ON_ONCE(!iter))
2810                 return;
2811
2812         event_call = fbuffer->trace_file->event_call;
2813         if (!event_call || !event_call->event.funcs ||
2814             !event_call->event.funcs->trace)
2815                 return;
2816
2817         file = fbuffer->trace_file;
2818         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2819             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2820              !filter_match_preds(file->filter, fbuffer->entry)))
2821                 return;
2822
2823         event = &fbuffer->trace_file->event_call->event;
2824
2825         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2826         trace_seq_init(&iter->seq);
2827         iter->ent = fbuffer->entry;
2828         event_call->event.funcs->trace(iter, 0, event);
2829         trace_seq_putc(&iter->seq, 0);
2830         printk("%s", iter->seq.buffer);
2831
2832         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2833 }
2834
2835 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2836                              void *buffer, size_t *lenp,
2837                              loff_t *ppos)
2838 {
2839         int save_tracepoint_printk;
2840         int ret;
2841
2842         mutex_lock(&tracepoint_printk_mutex);
2843         save_tracepoint_printk = tracepoint_printk;
2844
2845         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2846
2847         /*
2848          * This will force exiting early, as tracepoint_printk
2849          * is always zero when tracepoint_printk_iter is not allocated
2850          */
2851         if (!tracepoint_print_iter)
2852                 tracepoint_printk = 0;
2853
2854         if (save_tracepoint_printk == tracepoint_printk)
2855                 goto out;
2856
2857         if (tracepoint_printk)
2858                 static_key_enable(&tracepoint_printk_key.key);
2859         else
2860                 static_key_disable(&tracepoint_printk_key.key);
2861
2862  out:
2863         mutex_unlock(&tracepoint_printk_mutex);
2864
2865         return ret;
2866 }
2867
2868 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2869 {
2870         if (static_key_false(&tracepoint_printk_key.key))
2871                 output_printk(fbuffer);
2872
2873         if (static_branch_unlikely(&trace_event_exports_enabled))
2874                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2875         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2876                                     fbuffer->event, fbuffer->entry,
2877                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2878 }
2879 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2880
2881 /*
2882  * Skip 3:
2883  *
2884  *   trace_buffer_unlock_commit_regs()
2885  *   trace_event_buffer_commit()
2886  *   trace_event_raw_event_xxx()
2887  */
2888 # define STACK_SKIP 3
2889
2890 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2891                                      struct trace_buffer *buffer,
2892                                      struct ring_buffer_event *event,
2893                                      unsigned long flags, int pc,
2894                                      struct pt_regs *regs)
2895 {
2896         __buffer_unlock_commit(buffer, event);
2897
2898         /*
2899          * If regs is not set, then skip the necessary functions.
2900          * Note, we can still get here via blktrace, wakeup tracer
2901          * and mmiotrace, but that's ok if they lose a function or
2902          * two. They are not that meaningful.
2903          */
2904         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2905         ftrace_trace_userstack(tr, buffer, flags, pc);
2906 }
2907
2908 /*
2909  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2910  */
2911 void
2912 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2913                                    struct ring_buffer_event *event)
2914 {
2915         __buffer_unlock_commit(buffer, event);
2916 }
2917
2918 void
2919 trace_function(struct trace_array *tr,
2920                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2921                int pc)
2922 {
2923         struct trace_event_call *call = &event_function;
2924         struct trace_buffer *buffer = tr->array_buffer.buffer;
2925         struct ring_buffer_event *event;
2926         struct ftrace_entry *entry;
2927
2928         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2929                                             flags, pc);
2930         if (!event)
2931                 return;
2932         entry   = ring_buffer_event_data(event);
2933         entry->ip                       = ip;
2934         entry->parent_ip                = parent_ip;
2935
2936         if (!call_filter_check_discard(call, entry, buffer, event)) {
2937                 if (static_branch_unlikely(&trace_function_exports_enabled))
2938                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2939                 __buffer_unlock_commit(buffer, event);
2940         }
2941 }
2942
2943 #ifdef CONFIG_STACKTRACE
2944
2945 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2946 #define FTRACE_KSTACK_NESTING   4
2947
2948 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2949
2950 struct ftrace_stack {
2951         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2952 };
2953
2954
2955 struct ftrace_stacks {
2956         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2957 };
2958
2959 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2960 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2961
2962 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2963                                  unsigned long flags,
2964                                  int skip, int pc, struct pt_regs *regs)
2965 {
2966         struct trace_event_call *call = &event_kernel_stack;
2967         struct ring_buffer_event *event;
2968         unsigned int size, nr_entries;
2969         struct ftrace_stack *fstack;
2970         struct stack_entry *entry;
2971         int stackidx;
2972
2973         /*
2974          * Add one, for this function and the call to save_stack_trace()
2975          * If regs is set, then these functions will not be in the way.
2976          */
2977 #ifndef CONFIG_UNWINDER_ORC
2978         if (!regs)
2979                 skip++;
2980 #endif
2981
2982         preempt_disable_notrace();
2983
2984         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2985
2986         /* This should never happen. If it does, yell once and skip */
2987         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2988                 goto out;
2989
2990         /*
2991          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2992          * interrupt will either see the value pre increment or post
2993          * increment. If the interrupt happens pre increment it will have
2994          * restored the counter when it returns.  We just need a barrier to
2995          * keep gcc from moving things around.
2996          */
2997         barrier();
2998
2999         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3000         size = ARRAY_SIZE(fstack->calls);
3001
3002         if (regs) {
3003                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3004                                                    size, skip);
3005         } else {
3006                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3007         }
3008
3009         size = nr_entries * sizeof(unsigned long);
3010         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3011                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3012                                     flags, pc);
3013         if (!event)
3014                 goto out;
3015         entry = ring_buffer_event_data(event);
3016
3017         memcpy(&entry->caller, fstack->calls, size);
3018         entry->size = nr_entries;
3019
3020         if (!call_filter_check_discard(call, entry, buffer, event))
3021                 __buffer_unlock_commit(buffer, event);
3022
3023  out:
3024         /* Again, don't let gcc optimize things here */
3025         barrier();
3026         __this_cpu_dec(ftrace_stack_reserve);
3027         preempt_enable_notrace();
3028
3029 }
3030
3031 static inline void ftrace_trace_stack(struct trace_array *tr,
3032                                       struct trace_buffer *buffer,
3033                                       unsigned long flags,
3034                                       int skip, int pc, struct pt_regs *regs)
3035 {
3036         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3037                 return;
3038
3039         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3040 }
3041
3042 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3043                    int pc)
3044 {
3045         struct trace_buffer *buffer = tr->array_buffer.buffer;
3046
3047         if (rcu_is_watching()) {
3048                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3049                 return;
3050         }
3051
3052         /*
3053          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3054          * but if the above rcu_is_watching() failed, then the NMI
3055          * triggered someplace critical, and rcu_irq_enter() should
3056          * not be called from NMI.
3057          */
3058         if (unlikely(in_nmi()))
3059                 return;
3060
3061         rcu_irq_enter_irqson();
3062         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3063         rcu_irq_exit_irqson();
3064 }
3065
3066 /**
3067  * trace_dump_stack - record a stack back trace in the trace buffer
3068  * @skip: Number of functions to skip (helper handlers)
3069  */
3070 void trace_dump_stack(int skip)
3071 {
3072         unsigned long flags;
3073
3074         if (tracing_disabled || tracing_selftest_running)
3075                 return;
3076
3077         local_save_flags(flags);
3078
3079 #ifndef CONFIG_UNWINDER_ORC
3080         /* Skip 1 to skip this function. */
3081         skip++;
3082 #endif
3083         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3084                              flags, skip, preempt_count(), NULL);
3085 }
3086 EXPORT_SYMBOL_GPL(trace_dump_stack);
3087
3088 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3089 static DEFINE_PER_CPU(int, user_stack_count);
3090
3091 static void
3092 ftrace_trace_userstack(struct trace_array *tr,
3093                        struct trace_buffer *buffer, unsigned long flags, int pc)
3094 {
3095         struct trace_event_call *call = &event_user_stack;
3096         struct ring_buffer_event *event;
3097         struct userstack_entry *entry;
3098
3099         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3100                 return;
3101
3102         /*
3103          * NMIs can not handle page faults, even with fix ups.
3104          * The save user stack can (and often does) fault.
3105          */
3106         if (unlikely(in_nmi()))
3107                 return;
3108
3109         /*
3110          * prevent recursion, since the user stack tracing may
3111          * trigger other kernel events.
3112          */
3113         preempt_disable();
3114         if (__this_cpu_read(user_stack_count))
3115                 goto out;
3116
3117         __this_cpu_inc(user_stack_count);
3118
3119         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3120                                             sizeof(*entry), flags, pc);
3121         if (!event)
3122                 goto out_drop_count;
3123         entry   = ring_buffer_event_data(event);
3124
3125         entry->tgid             = current->tgid;
3126         memset(&entry->caller, 0, sizeof(entry->caller));
3127
3128         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3129         if (!call_filter_check_discard(call, entry, buffer, event))
3130                 __buffer_unlock_commit(buffer, event);
3131
3132  out_drop_count:
3133         __this_cpu_dec(user_stack_count);
3134  out:
3135         preempt_enable();
3136 }
3137 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3138 static void ftrace_trace_userstack(struct trace_array *tr,
3139                                    struct trace_buffer *buffer,
3140                                    unsigned long flags, int pc)
3141 {
3142 }
3143 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3144
3145 #endif /* CONFIG_STACKTRACE */
3146
3147 /* created for use with alloc_percpu */
3148 struct trace_buffer_struct {
3149         int nesting;
3150         char buffer[4][TRACE_BUF_SIZE];
3151 };
3152
3153 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3154
3155 /*
3156  * Thise allows for lockless recording.  If we're nested too deeply, then
3157  * this returns NULL.
3158  */
3159 static char *get_trace_buf(void)
3160 {
3161         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3162
3163         if (!trace_percpu_buffer || buffer->nesting >= 4)
3164                 return NULL;
3165
3166         buffer->nesting++;
3167
3168         /* Interrupts must see nesting incremented before we use the buffer */
3169         barrier();
3170         return &buffer->buffer[buffer->nesting - 1][0];
3171 }
3172
3173 static void put_trace_buf(void)
3174 {
3175         /* Don't let the decrement of nesting leak before this */
3176         barrier();
3177         this_cpu_dec(trace_percpu_buffer->nesting);
3178 }
3179
3180 static int alloc_percpu_trace_buffer(void)
3181 {
3182         struct trace_buffer_struct __percpu *buffers;
3183
3184         if (trace_percpu_buffer)
3185                 return 0;
3186
3187         buffers = alloc_percpu(struct trace_buffer_struct);
3188         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3189                 return -ENOMEM;
3190
3191         trace_percpu_buffer = buffers;
3192         return 0;
3193 }
3194
3195 static int buffers_allocated;
3196
3197 void trace_printk_init_buffers(void)
3198 {
3199         if (buffers_allocated)
3200                 return;
3201
3202         if (alloc_percpu_trace_buffer())
3203                 return;
3204
3205         /* trace_printk() is for debug use only. Don't use it in production. */
3206
3207         pr_warn("\n");
3208         pr_warn("**********************************************************\n");
3209         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3210         pr_warn("**                                                      **\n");
3211         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3212         pr_warn("**                                                      **\n");
3213         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3214         pr_warn("** unsafe for production use.                           **\n");
3215         pr_warn("**                                                      **\n");
3216         pr_warn("** If you see this message and you are not debugging    **\n");
3217         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3218         pr_warn("**                                                      **\n");
3219         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3220         pr_warn("**********************************************************\n");
3221
3222         /* Expand the buffers to set size */
3223         tracing_update_buffers();
3224
3225         buffers_allocated = 1;
3226
3227         /*
3228          * trace_printk_init_buffers() can be called by modules.
3229          * If that happens, then we need to start cmdline recording
3230          * directly here. If the global_trace.buffer is already
3231          * allocated here, then this was called by module code.
3232          */
3233         if (global_trace.array_buffer.buffer)
3234                 tracing_start_cmdline_record();
3235 }
3236 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3237
3238 void trace_printk_start_comm(void)
3239 {
3240         /* Start tracing comms if trace printk is set */
3241         if (!buffers_allocated)
3242                 return;
3243         tracing_start_cmdline_record();
3244 }
3245
3246 static void trace_printk_start_stop_comm(int enabled)
3247 {
3248         if (!buffers_allocated)
3249                 return;
3250
3251         if (enabled)
3252                 tracing_start_cmdline_record();
3253         else
3254                 tracing_stop_cmdline_record();
3255 }
3256
3257 /**
3258  * trace_vbprintk - write binary msg to tracing buffer
3259  * @ip:    The address of the caller
3260  * @fmt:   The string format to write to the buffer
3261  * @args:  Arguments for @fmt
3262  */
3263 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3264 {
3265         struct trace_event_call *call = &event_bprint;
3266         struct ring_buffer_event *event;
3267         struct trace_buffer *buffer;
3268         struct trace_array *tr = &global_trace;
3269         struct bprint_entry *entry;
3270         unsigned long flags;
3271         char *tbuffer;
3272         int len = 0, size, pc;
3273
3274         if (unlikely(tracing_selftest_running || tracing_disabled))
3275                 return 0;
3276
3277         /* Don't pollute graph traces with trace_vprintk internals */
3278         pause_graph_tracing();
3279
3280         pc = preempt_count();
3281         preempt_disable_notrace();
3282
3283         tbuffer = get_trace_buf();
3284         if (!tbuffer) {
3285                 len = 0;
3286                 goto out_nobuffer;
3287         }
3288
3289         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3290
3291         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3292                 goto out_put;
3293
3294         local_save_flags(flags);
3295         size = sizeof(*entry) + sizeof(u32) * len;
3296         buffer = tr->array_buffer.buffer;
3297         ring_buffer_nest_start(buffer);
3298         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3299                                             flags, pc);
3300         if (!event)
3301                 goto out;
3302         entry = ring_buffer_event_data(event);
3303         entry->ip                       = ip;
3304         entry->fmt                      = fmt;
3305
3306         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3307         if (!call_filter_check_discard(call, entry, buffer, event)) {
3308                 __buffer_unlock_commit(buffer, event);
3309                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3310         }
3311
3312 out:
3313         ring_buffer_nest_end(buffer);
3314 out_put:
3315         put_trace_buf();
3316
3317 out_nobuffer:
3318         preempt_enable_notrace();
3319         unpause_graph_tracing();
3320
3321         return len;
3322 }
3323 EXPORT_SYMBOL_GPL(trace_vbprintk);
3324
3325 __printf(3, 0)
3326 static int
3327 __trace_array_vprintk(struct trace_buffer *buffer,
3328                       unsigned long ip, const char *fmt, va_list args)
3329 {
3330         struct trace_event_call *call = &event_print;
3331         struct ring_buffer_event *event;
3332         int len = 0, size, pc;
3333         struct print_entry *entry;
3334         unsigned long flags;
3335         char *tbuffer;
3336
3337         if (tracing_disabled || tracing_selftest_running)
3338                 return 0;
3339
3340         /* Don't pollute graph traces with trace_vprintk internals */
3341         pause_graph_tracing();
3342
3343         pc = preempt_count();
3344         preempt_disable_notrace();
3345
3346
3347         tbuffer = get_trace_buf();
3348         if (!tbuffer) {
3349                 len = 0;
3350                 goto out_nobuffer;
3351         }
3352
3353         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3354
3355         local_save_flags(flags);
3356         size = sizeof(*entry) + len + 1;
3357         ring_buffer_nest_start(buffer);
3358         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3359                                             flags, pc);
3360         if (!event)
3361                 goto out;
3362         entry = ring_buffer_event_data(event);
3363         entry->ip = ip;
3364
3365         memcpy(&entry->buf, tbuffer, len + 1);
3366         if (!call_filter_check_discard(call, entry, buffer, event)) {
3367                 __buffer_unlock_commit(buffer, event);
3368                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3369         }
3370
3371 out:
3372         ring_buffer_nest_end(buffer);
3373         put_trace_buf();
3374
3375 out_nobuffer:
3376         preempt_enable_notrace();
3377         unpause_graph_tracing();
3378
3379         return len;
3380 }
3381
3382 __printf(3, 0)
3383 int trace_array_vprintk(struct trace_array *tr,
3384                         unsigned long ip, const char *fmt, va_list args)
3385 {
3386         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3387 }
3388
3389 /**
3390  * trace_array_printk - Print a message to a specific instance
3391  * @tr: The instance trace_array descriptor
3392  * @ip: The instruction pointer that this is called from.
3393  * @fmt: The format to print (printf format)
3394  *
3395  * If a subsystem sets up its own instance, they have the right to
3396  * printk strings into their tracing instance buffer using this
3397  * function. Note, this function will not write into the top level
3398  * buffer (use trace_printk() for that), as writing into the top level
3399  * buffer should only have events that can be individually disabled.
3400  * trace_printk() is only used for debugging a kernel, and should not
3401  * be ever encorporated in normal use.
3402  *
3403  * trace_array_printk() can be used, as it will not add noise to the
3404  * top level tracing buffer.
3405  *
3406  * Note, trace_array_init_printk() must be called on @tr before this
3407  * can be used.
3408  */
3409 __printf(3, 0)
3410 int trace_array_printk(struct trace_array *tr,
3411                        unsigned long ip, const char *fmt, ...)
3412 {
3413         int ret;
3414         va_list ap;
3415
3416         if (!tr)
3417                 return -ENOENT;
3418
3419         /* This is only allowed for created instances */
3420         if (tr == &global_trace)
3421                 return 0;
3422
3423         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3424                 return 0;
3425
3426         va_start(ap, fmt);
3427         ret = trace_array_vprintk(tr, ip, fmt, ap);
3428         va_end(ap);
3429         return ret;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_array_printk);
3432
3433 /**
3434  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3435  * @tr: The trace array to initialize the buffers for
3436  *
3437  * As trace_array_printk() only writes into instances, they are OK to
3438  * have in the kernel (unlike trace_printk()). This needs to be called
3439  * before trace_array_printk() can be used on a trace_array.
3440  */
3441 int trace_array_init_printk(struct trace_array *tr)
3442 {
3443         if (!tr)
3444                 return -ENOENT;
3445
3446         /* This is only allowed for created instances */
3447         if (tr == &global_trace)
3448                 return -EINVAL;
3449
3450         return alloc_percpu_trace_buffer();
3451 }
3452 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3453
3454 __printf(3, 4)
3455 int trace_array_printk_buf(struct trace_buffer *buffer,
3456                            unsigned long ip, const char *fmt, ...)
3457 {
3458         int ret;
3459         va_list ap;
3460
3461         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3462                 return 0;
3463
3464         va_start(ap, fmt);
3465         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3466         va_end(ap);
3467         return ret;
3468 }
3469
3470 __printf(2, 0)
3471 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3472 {
3473         return trace_array_vprintk(&global_trace, ip, fmt, args);
3474 }
3475 EXPORT_SYMBOL_GPL(trace_vprintk);
3476
3477 static void trace_iterator_increment(struct trace_iterator *iter)
3478 {
3479         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3480
3481         iter->idx++;
3482         if (buf_iter)
3483                 ring_buffer_iter_advance(buf_iter);
3484 }
3485
3486 static struct trace_entry *
3487 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3488                 unsigned long *lost_events)
3489 {
3490         struct ring_buffer_event *event;
3491         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3492
3493         if (buf_iter) {
3494                 event = ring_buffer_iter_peek(buf_iter, ts);
3495                 if (lost_events)
3496                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3497                                 (unsigned long)-1 : 0;
3498         } else {
3499                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3500                                          lost_events);
3501         }
3502
3503         if (event) {
3504                 iter->ent_size = ring_buffer_event_length(event);
3505                 return ring_buffer_event_data(event);
3506         }
3507         iter->ent_size = 0;
3508         return NULL;
3509 }
3510
3511 static struct trace_entry *
3512 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3513                   unsigned long *missing_events, u64 *ent_ts)
3514 {
3515         struct trace_buffer *buffer = iter->array_buffer->buffer;
3516         struct trace_entry *ent, *next = NULL;
3517         unsigned long lost_events = 0, next_lost = 0;
3518         int cpu_file = iter->cpu_file;
3519         u64 next_ts = 0, ts;
3520         int next_cpu = -1;
3521         int next_size = 0;
3522         int cpu;
3523
3524         /*
3525          * If we are in a per_cpu trace file, don't bother by iterating over
3526          * all cpu and peek directly.
3527          */
3528         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3529                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3530                         return NULL;
3531                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3532                 if (ent_cpu)
3533                         *ent_cpu = cpu_file;
3534
3535                 return ent;
3536         }
3537
3538         for_each_tracing_cpu(cpu) {
3539
3540                 if (ring_buffer_empty_cpu(buffer, cpu))
3541                         continue;
3542
3543                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3544
3545                 /*
3546                  * Pick the entry with the smallest timestamp:
3547                  */
3548                 if (ent && (!next || ts < next_ts)) {
3549                         next = ent;
3550                         next_cpu = cpu;
3551                         next_ts = ts;
3552                         next_lost = lost_events;
3553                         next_size = iter->ent_size;
3554                 }
3555         }
3556
3557         iter->ent_size = next_size;
3558
3559         if (ent_cpu)
3560                 *ent_cpu = next_cpu;
3561
3562         if (ent_ts)
3563                 *ent_ts = next_ts;
3564
3565         if (missing_events)
3566                 *missing_events = next_lost;
3567
3568         return next;
3569 }
3570
3571 #define STATIC_FMT_BUF_SIZE     128
3572 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3573
3574 static char *trace_iter_expand_format(struct trace_iterator *iter)
3575 {
3576         char *tmp;
3577
3578         if (iter->fmt == static_fmt_buf)
3579                 return NULL;
3580
3581         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3582                        GFP_KERNEL);
3583         if (tmp) {
3584                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3585                 iter->fmt = tmp;
3586         }
3587
3588         return tmp;
3589 }
3590
3591 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3592 {
3593         const char *p, *new_fmt;
3594         char *q;
3595
3596         if (WARN_ON_ONCE(!fmt))
3597                 return fmt;
3598
3599         p = fmt;
3600         new_fmt = q = iter->fmt;
3601         while (*p) {
3602                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3603                         if (!trace_iter_expand_format(iter))
3604                                 return fmt;
3605
3606                         q += iter->fmt - new_fmt;
3607                         new_fmt = iter->fmt;
3608                 }
3609
3610                 *q++ = *p++;
3611
3612                 /* Replace %p with %px */
3613                 if (p[-1] == '%') {
3614                         if (p[0] == '%') {
3615                                 *q++ = *p++;
3616                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3617                                 *q++ = *p++;
3618                                 *q++ = 'x';
3619                         }
3620                 }
3621         }
3622         *q = '\0';
3623
3624         return new_fmt;
3625 }
3626
3627 #define STATIC_TEMP_BUF_SIZE    128
3628 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3629
3630 /* Find the next real entry, without updating the iterator itself */
3631 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3632                                           int *ent_cpu, u64 *ent_ts)
3633 {
3634         /* __find_next_entry will reset ent_size */
3635         int ent_size = iter->ent_size;
3636         struct trace_entry *entry;
3637
3638         /*
3639          * If called from ftrace_dump(), then the iter->temp buffer
3640          * will be the static_temp_buf and not created from kmalloc.
3641          * If the entry size is greater than the buffer, we can
3642          * not save it. Just return NULL in that case. This is only
3643          * used to add markers when two consecutive events' time
3644          * stamps have a large delta. See trace_print_lat_context()
3645          */
3646         if (iter->temp == static_temp_buf &&
3647             STATIC_TEMP_BUF_SIZE < ent_size)
3648                 return NULL;
3649
3650         /*
3651          * The __find_next_entry() may call peek_next_entry(), which may
3652          * call ring_buffer_peek() that may make the contents of iter->ent
3653          * undefined. Need to copy iter->ent now.
3654          */
3655         if (iter->ent && iter->ent != iter->temp) {
3656                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3657                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3658                         void *temp;
3659                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3660                         if (!temp)
3661                                 return NULL;
3662                         kfree(iter->temp);
3663                         iter->temp = temp;
3664                         iter->temp_size = iter->ent_size;
3665                 }
3666                 memcpy(iter->temp, iter->ent, iter->ent_size);
3667                 iter->ent = iter->temp;
3668         }
3669         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3670         /* Put back the original ent_size */
3671         iter->ent_size = ent_size;
3672
3673         return entry;
3674 }
3675
3676 /* Find the next real entry, and increment the iterator to the next entry */
3677 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3678 {
3679         iter->ent = __find_next_entry(iter, &iter->cpu,
3680                                       &iter->lost_events, &iter->ts);
3681
3682         if (iter->ent)
3683                 trace_iterator_increment(iter);
3684
3685         return iter->ent ? iter : NULL;
3686 }
3687
3688 static void trace_consume(struct trace_iterator *iter)
3689 {
3690         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3691                             &iter->lost_events);
3692 }
3693
3694 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3695 {
3696         struct trace_iterator *iter = m->private;
3697         int i = (int)*pos;
3698         void *ent;
3699
3700         WARN_ON_ONCE(iter->leftover);
3701
3702         (*pos)++;
3703
3704         /* can't go backwards */
3705         if (iter->idx > i)
3706                 return NULL;
3707
3708         if (iter->idx < 0)
3709                 ent = trace_find_next_entry_inc(iter);
3710         else
3711                 ent = iter;
3712
3713         while (ent && iter->idx < i)
3714                 ent = trace_find_next_entry_inc(iter);
3715
3716         iter->pos = *pos;
3717
3718         return ent;
3719 }
3720
3721 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3722 {
3723         struct ring_buffer_iter *buf_iter;
3724         unsigned long entries = 0;
3725         u64 ts;
3726
3727         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3728
3729         buf_iter = trace_buffer_iter(iter, cpu);
3730         if (!buf_iter)
3731                 return;
3732
3733         ring_buffer_iter_reset(buf_iter);
3734
3735         /*
3736          * We could have the case with the max latency tracers
3737          * that a reset never took place on a cpu. This is evident
3738          * by the timestamp being before the start of the buffer.
3739          */
3740         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3741                 if (ts >= iter->array_buffer->time_start)
3742                         break;
3743                 entries++;
3744                 ring_buffer_iter_advance(buf_iter);
3745         }
3746
3747         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3748 }
3749
3750 /*
3751  * The current tracer is copied to avoid a global locking
3752  * all around.
3753  */
3754 static void *s_start(struct seq_file *m, loff_t *pos)
3755 {
3756         struct trace_iterator *iter = m->private;
3757         struct trace_array *tr = iter->tr;
3758         int cpu_file = iter->cpu_file;
3759         void *p = NULL;
3760         loff_t l = 0;
3761         int cpu;
3762
3763         /*
3764          * copy the tracer to avoid using a global lock all around.
3765          * iter->trace is a copy of current_trace, the pointer to the
3766          * name may be used instead of a strcmp(), as iter->trace->name
3767          * will point to the same string as current_trace->name.
3768          */
3769         mutex_lock(&trace_types_lock);
3770         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3771                 /* Close iter->trace before switching to the new current tracer */
3772                 if (iter->trace->close)
3773                         iter->trace->close(iter);
3774                 *iter->trace = *tr->current_trace;
3775                 /* Reopen the new current tracer */
3776                 if (iter->trace->open)
3777                         iter->trace->open(iter);
3778         }
3779         mutex_unlock(&trace_types_lock);
3780
3781 #ifdef CONFIG_TRACER_MAX_TRACE
3782         if (iter->snapshot && iter->trace->use_max_tr)
3783                 return ERR_PTR(-EBUSY);
3784 #endif
3785
3786         if (*pos != iter->pos) {
3787                 iter->ent = NULL;
3788                 iter->cpu = 0;
3789                 iter->idx = -1;
3790
3791                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3792                         for_each_tracing_cpu(cpu)
3793                                 tracing_iter_reset(iter, cpu);
3794                 } else
3795                         tracing_iter_reset(iter, cpu_file);
3796
3797                 iter->leftover = 0;
3798                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3799                         ;
3800
3801         } else {
3802                 /*
3803                  * If we overflowed the seq_file before, then we want
3804                  * to just reuse the trace_seq buffer again.
3805                  */
3806                 if (iter->leftover)
3807                         p = iter;
3808                 else {
3809                         l = *pos - 1;
3810                         p = s_next(m, p, &l);
3811                 }
3812         }
3813
3814         trace_event_read_lock();
3815         trace_access_lock(cpu_file);
3816         return p;
3817 }
3818
3819 static void s_stop(struct seq_file *m, void *p)
3820 {
3821         struct trace_iterator *iter = m->private;
3822
3823 #ifdef CONFIG_TRACER_MAX_TRACE
3824         if (iter->snapshot && iter->trace->use_max_tr)
3825                 return;
3826 #endif
3827
3828         trace_access_unlock(iter->cpu_file);
3829         trace_event_read_unlock();
3830 }
3831
3832 static void
3833 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3834                       unsigned long *entries, int cpu)
3835 {
3836         unsigned long count;
3837
3838         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3839         /*
3840          * If this buffer has skipped entries, then we hold all
3841          * entries for the trace and we need to ignore the
3842          * ones before the time stamp.
3843          */
3844         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3845                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3846                 /* total is the same as the entries */
3847                 *total = count;
3848         } else
3849                 *total = count +
3850                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3851         *entries = count;
3852 }
3853
3854 static void
3855 get_total_entries(struct array_buffer *buf,
3856                   unsigned long *total, unsigned long *entries)
3857 {
3858         unsigned long t, e;
3859         int cpu;
3860
3861         *total = 0;
3862         *entries = 0;
3863
3864         for_each_tracing_cpu(cpu) {
3865                 get_total_entries_cpu(buf, &t, &e, cpu);
3866                 *total += t;
3867                 *entries += e;
3868         }
3869 }
3870
3871 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3872 {
3873         unsigned long total, entries;
3874
3875         if (!tr)
3876                 tr = &global_trace;
3877
3878         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3879
3880         return entries;
3881 }
3882
3883 unsigned long trace_total_entries(struct trace_array *tr)
3884 {
3885         unsigned long total, entries;
3886
3887         if (!tr)
3888                 tr = &global_trace;
3889
3890         get_total_entries(&tr->array_buffer, &total, &entries);
3891
3892         return entries;
3893 }
3894
3895 static void print_lat_help_header(struct seq_file *m)
3896 {
3897         seq_puts(m, "#                    _------=> CPU#            \n"
3898                     "#                   / _-----=> irqs-off        \n"
3899                     "#                  | / _----=> need-resched    \n"
3900                     "#                  || / _---=> hardirq/softirq \n"
3901                     "#                  ||| / _--=> preempt-depth   \n"
3902                     "#                  |||| /     delay            \n"
3903                     "#  cmd     pid     ||||| time  |   caller      \n"
3904                     "#     \\   /        |||||  \\    |   /         \n");
3905 }
3906
3907 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3908 {
3909         unsigned long total;
3910         unsigned long entries;
3911
3912         get_total_entries(buf, &total, &entries);
3913         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3914                    entries, total, num_online_cpus());
3915         seq_puts(m, "#\n");
3916 }
3917
3918 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3919                                    unsigned int flags)
3920 {
3921         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3922
3923         print_event_info(buf, m);
3924
3925         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3926         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3927 }
3928
3929 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3930                                        unsigned int flags)
3931 {
3932         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3933         const char *space = "            ";
3934         int prec = tgid ? 12 : 2;
3935
3936         print_event_info(buf, m);
3937
3938         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3939         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3940         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3941         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3942         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3943         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3944         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3945 }
3946
3947 void
3948 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3949 {
3950         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3951         struct array_buffer *buf = iter->array_buffer;
3952         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3953         struct tracer *type = iter->trace;
3954         unsigned long entries;
3955         unsigned long total;
3956         const char *name = "preemption";
3957
3958         name = type->name;
3959
3960         get_total_entries(buf, &total, &entries);
3961
3962         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3963                    name, UTS_RELEASE);
3964         seq_puts(m, "# -----------------------------------"
3965                  "---------------------------------\n");
3966         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3967                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3968                    nsecs_to_usecs(data->saved_latency),
3969                    entries,
3970                    total,
3971                    buf->cpu,
3972 #if defined(CONFIG_PREEMPT_NONE)
3973                    "server",
3974 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3975                    "desktop",
3976 #elif defined(CONFIG_PREEMPT)
3977                    "preempt",
3978 #elif defined(CONFIG_PREEMPT_RT)
3979                    "preempt_rt",
3980 #else
3981                    "unknown",
3982 #endif
3983                    /* These are reserved for later use */
3984                    0, 0, 0, 0);
3985 #ifdef CONFIG_SMP
3986         seq_printf(m, " #P:%d)\n", num_online_cpus());
3987 #else
3988         seq_puts(m, ")\n");
3989 #endif
3990         seq_puts(m, "#    -----------------\n");
3991         seq_printf(m, "#    | task: %.16s-%d "
3992                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3993                    data->comm, data->pid,
3994                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3995                    data->policy, data->rt_priority);
3996         seq_puts(m, "#    -----------------\n");
3997
3998         if (data->critical_start) {
3999                 seq_puts(m, "#  => started at: ");
4000                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4001                 trace_print_seq(m, &iter->seq);
4002                 seq_puts(m, "\n#  => ended at:   ");
4003                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4004                 trace_print_seq(m, &iter->seq);
4005                 seq_puts(m, "\n#\n");
4006         }
4007
4008         seq_puts(m, "#\n");
4009 }
4010
4011 static void test_cpu_buff_start(struct trace_iterator *iter)
4012 {
4013         struct trace_seq *s = &iter->seq;
4014         struct trace_array *tr = iter->tr;
4015
4016         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4017                 return;
4018
4019         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4020                 return;
4021
4022         if (cpumask_available(iter->started) &&
4023             cpumask_test_cpu(iter->cpu, iter->started))
4024                 return;
4025
4026         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4027                 return;
4028
4029         if (cpumask_available(iter->started))
4030                 cpumask_set_cpu(iter->cpu, iter->started);
4031
4032         /* Don't print started cpu buffer for the first entry of the trace */
4033         if (iter->idx > 1)
4034                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4035                                 iter->cpu);
4036 }
4037
4038 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4039 {
4040         struct trace_array *tr = iter->tr;
4041         struct trace_seq *s = &iter->seq;
4042         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4043         struct trace_entry *entry;
4044         struct trace_event *event;
4045
4046         entry = iter->ent;
4047
4048         test_cpu_buff_start(iter);
4049
4050         event = ftrace_find_event(entry->type);
4051
4052         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4053                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4054                         trace_print_lat_context(iter);
4055                 else
4056                         trace_print_context(iter);
4057         }
4058
4059         if (trace_seq_has_overflowed(s))
4060                 return TRACE_TYPE_PARTIAL_LINE;
4061
4062         if (event)
4063                 return event->funcs->trace(iter, sym_flags, event);
4064
4065         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4066
4067         return trace_handle_return(s);
4068 }
4069
4070 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4071 {
4072         struct trace_array *tr = iter->tr;
4073         struct trace_seq *s = &iter->seq;
4074         struct trace_entry *entry;
4075         struct trace_event *event;
4076
4077         entry = iter->ent;
4078
4079         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4080                 trace_seq_printf(s, "%d %d %llu ",
4081                                  entry->pid, iter->cpu, iter->ts);
4082
4083         if (trace_seq_has_overflowed(s))
4084                 return TRACE_TYPE_PARTIAL_LINE;
4085
4086         event = ftrace_find_event(entry->type);
4087         if (event)
4088                 return event->funcs->raw(iter, 0, event);
4089
4090         trace_seq_printf(s, "%d ?\n", entry->type);
4091
4092         return trace_handle_return(s);
4093 }
4094
4095 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4096 {
4097         struct trace_array *tr = iter->tr;
4098         struct trace_seq *s = &iter->seq;
4099         unsigned char newline = '\n';
4100         struct trace_entry *entry;
4101         struct trace_event *event;
4102
4103         entry = iter->ent;
4104
4105         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4106                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4107                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4108                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4109                 if (trace_seq_has_overflowed(s))
4110                         return TRACE_TYPE_PARTIAL_LINE;
4111         }
4112
4113         event = ftrace_find_event(entry->type);
4114         if (event) {
4115                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4116                 if (ret != TRACE_TYPE_HANDLED)
4117                         return ret;
4118         }
4119
4120         SEQ_PUT_FIELD(s, newline);
4121
4122         return trace_handle_return(s);
4123 }
4124
4125 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4126 {
4127         struct trace_array *tr = iter->tr;
4128         struct trace_seq *s = &iter->seq;
4129         struct trace_entry *entry;
4130         struct trace_event *event;
4131
4132         entry = iter->ent;
4133
4134         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4135                 SEQ_PUT_FIELD(s, entry->pid);
4136                 SEQ_PUT_FIELD(s, iter->cpu);
4137                 SEQ_PUT_FIELD(s, iter->ts);
4138                 if (trace_seq_has_overflowed(s))
4139                         return TRACE_TYPE_PARTIAL_LINE;
4140         }
4141
4142         event = ftrace_find_event(entry->type);
4143         return event ? event->funcs->binary(iter, 0, event) :
4144                 TRACE_TYPE_HANDLED;
4145 }
4146
4147 int trace_empty(struct trace_iterator *iter)
4148 {
4149         struct ring_buffer_iter *buf_iter;
4150         int cpu;
4151
4152         /* If we are looking at one CPU buffer, only check that one */
4153         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4154                 cpu = iter->cpu_file;
4155                 buf_iter = trace_buffer_iter(iter, cpu);
4156                 if (buf_iter) {
4157                         if (!ring_buffer_iter_empty(buf_iter))
4158                                 return 0;
4159                 } else {
4160                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4161                                 return 0;
4162                 }
4163                 return 1;
4164         }
4165
4166         for_each_tracing_cpu(cpu) {
4167                 buf_iter = trace_buffer_iter(iter, cpu);
4168                 if (buf_iter) {
4169                         if (!ring_buffer_iter_empty(buf_iter))
4170                                 return 0;
4171                 } else {
4172                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4173                                 return 0;
4174                 }
4175         }
4176
4177         return 1;
4178 }
4179
4180 /*  Called with trace_event_read_lock() held. */
4181 enum print_line_t print_trace_line(struct trace_iterator *iter)
4182 {
4183         struct trace_array *tr = iter->tr;
4184         unsigned long trace_flags = tr->trace_flags;
4185         enum print_line_t ret;
4186
4187         if (iter->lost_events) {
4188                 if (iter->lost_events == (unsigned long)-1)
4189                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4190                                          iter->cpu);
4191                 else
4192                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4193                                          iter->cpu, iter->lost_events);
4194                 if (trace_seq_has_overflowed(&iter->seq))
4195                         return TRACE_TYPE_PARTIAL_LINE;
4196         }
4197
4198         if (iter->trace && iter->trace->print_line) {
4199                 ret = iter->trace->print_line(iter);
4200                 if (ret != TRACE_TYPE_UNHANDLED)
4201                         return ret;
4202         }
4203
4204         if (iter->ent->type == TRACE_BPUTS &&
4205                         trace_flags & TRACE_ITER_PRINTK &&
4206                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4207                 return trace_print_bputs_msg_only(iter);
4208
4209         if (iter->ent->type == TRACE_BPRINT &&
4210                         trace_flags & TRACE_ITER_PRINTK &&
4211                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4212                 return trace_print_bprintk_msg_only(iter);
4213
4214         if (iter->ent->type == TRACE_PRINT &&
4215                         trace_flags & TRACE_ITER_PRINTK &&
4216                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4217                 return trace_print_printk_msg_only(iter);
4218
4219         if (trace_flags & TRACE_ITER_BIN)
4220                 return print_bin_fmt(iter);
4221
4222         if (trace_flags & TRACE_ITER_HEX)
4223                 return print_hex_fmt(iter);
4224
4225         if (trace_flags & TRACE_ITER_RAW)
4226                 return print_raw_fmt(iter);
4227
4228         return print_trace_fmt(iter);
4229 }
4230
4231 void trace_latency_header(struct seq_file *m)
4232 {
4233         struct trace_iterator *iter = m->private;
4234         struct trace_array *tr = iter->tr;
4235
4236         /* print nothing if the buffers are empty */
4237         if (trace_empty(iter))
4238                 return;
4239
4240         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4241                 print_trace_header(m, iter);
4242
4243         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4244                 print_lat_help_header(m);
4245 }
4246
4247 void trace_default_header(struct seq_file *m)
4248 {
4249         struct trace_iterator *iter = m->private;
4250         struct trace_array *tr = iter->tr;
4251         unsigned long trace_flags = tr->trace_flags;
4252
4253         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4254                 return;
4255
4256         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4257                 /* print nothing if the buffers are empty */
4258                 if (trace_empty(iter))
4259                         return;
4260                 print_trace_header(m, iter);
4261                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4262                         print_lat_help_header(m);
4263         } else {
4264                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4265                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4266                                 print_func_help_header_irq(iter->array_buffer,
4267                                                            m, trace_flags);
4268                         else
4269                                 print_func_help_header(iter->array_buffer, m,
4270                                                        trace_flags);
4271                 }
4272         }
4273 }
4274
4275 static void test_ftrace_alive(struct seq_file *m)
4276 {
4277         if (!ftrace_is_dead())
4278                 return;
4279         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4280                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4281 }
4282
4283 #ifdef CONFIG_TRACER_MAX_TRACE
4284 static void show_snapshot_main_help(struct seq_file *m)
4285 {
4286         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4287                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4288                     "#                      Takes a snapshot of the main buffer.\n"
4289                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4290                     "#                      (Doesn't have to be '2' works with any number that\n"
4291                     "#                       is not a '0' or '1')\n");
4292 }
4293
4294 static void show_snapshot_percpu_help(struct seq_file *m)
4295 {
4296         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4297 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4298         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4299                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4300 #else
4301         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4302                     "#                     Must use main snapshot file to allocate.\n");
4303 #endif
4304         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4305                     "#                      (Doesn't have to be '2' works with any number that\n"
4306                     "#                       is not a '0' or '1')\n");
4307 }
4308
4309 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4310 {
4311         if (iter->tr->allocated_snapshot)
4312                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4313         else
4314                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4315
4316         seq_puts(m, "# Snapshot commands:\n");
4317         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4318                 show_snapshot_main_help(m);
4319         else
4320                 show_snapshot_percpu_help(m);
4321 }
4322 #else
4323 /* Should never be called */
4324 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4325 #endif
4326
4327 static int s_show(struct seq_file *m, void *v)
4328 {
4329         struct trace_iterator *iter = v;
4330         int ret;
4331
4332         if (iter->ent == NULL) {
4333                 if (iter->tr) {
4334                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4335                         seq_puts(m, "#\n");
4336                         test_ftrace_alive(m);
4337                 }
4338                 if (iter->snapshot && trace_empty(iter))
4339                         print_snapshot_help(m, iter);
4340                 else if (iter->trace && iter->trace->print_header)
4341                         iter->trace->print_header(m);
4342                 else
4343                         trace_default_header(m);
4344
4345         } else if (iter->leftover) {
4346                 /*
4347                  * If we filled the seq_file buffer earlier, we
4348                  * want to just show it now.
4349                  */
4350                 ret = trace_print_seq(m, &iter->seq);
4351
4352                 /* ret should this time be zero, but you never know */
4353                 iter->leftover = ret;
4354
4355         } else {
4356                 ret = print_trace_line(iter);
4357                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4358                         iter->seq.full = 0;
4359                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4360                 }
4361                 ret = trace_print_seq(m, &iter->seq);
4362                 /*
4363                  * If we overflow the seq_file buffer, then it will
4364                  * ask us for this data again at start up.
4365                  * Use that instead.
4366                  *  ret is 0 if seq_file write succeeded.
4367                  *        -1 otherwise.
4368                  */
4369                 iter->leftover = ret;
4370         }
4371
4372         return 0;
4373 }
4374
4375 /*
4376  * Should be used after trace_array_get(), trace_types_lock
4377  * ensures that i_cdev was already initialized.
4378  */
4379 static inline int tracing_get_cpu(struct inode *inode)
4380 {
4381         if (inode->i_cdev) /* See trace_create_cpu_file() */
4382                 return (long)inode->i_cdev - 1;
4383         return RING_BUFFER_ALL_CPUS;
4384 }
4385
4386 static const struct seq_operations tracer_seq_ops = {
4387         .start          = s_start,
4388         .next           = s_next,
4389         .stop           = s_stop,
4390         .show           = s_show,
4391 };
4392
4393 static struct trace_iterator *
4394 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4395 {
4396         struct trace_array *tr = inode->i_private;
4397         struct trace_iterator *iter;
4398         int cpu;
4399
4400         if (tracing_disabled)
4401                 return ERR_PTR(-ENODEV);
4402
4403         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4404         if (!iter)
4405                 return ERR_PTR(-ENOMEM);
4406
4407         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4408                                     GFP_KERNEL);
4409         if (!iter->buffer_iter)
4410                 goto release;
4411
4412         /*
4413          * trace_find_next_entry() may need to save off iter->ent.
4414          * It will place it into the iter->temp buffer. As most
4415          * events are less than 128, allocate a buffer of that size.
4416          * If one is greater, then trace_find_next_entry() will
4417          * allocate a new buffer to adjust for the bigger iter->ent.
4418          * It's not critical if it fails to get allocated here.
4419          */
4420         iter->temp = kmalloc(128, GFP_KERNEL);
4421         if (iter->temp)
4422                 iter->temp_size = 128;
4423
4424         /*
4425          * trace_event_printf() may need to modify given format
4426          * string to replace %p with %px so that it shows real address
4427          * instead of hash value. However, that is only for the event
4428          * tracing, other tracer may not need. Defer the allocation
4429          * until it is needed.
4430          */
4431         iter->fmt = NULL;
4432         iter->fmt_size = 0;
4433
4434         /*
4435          * We make a copy of the current tracer to avoid concurrent
4436          * changes on it while we are reading.
4437          */
4438         mutex_lock(&trace_types_lock);
4439         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4440         if (!iter->trace)
4441                 goto fail;
4442
4443         *iter->trace = *tr->current_trace;
4444
4445         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4446                 goto fail;
4447
4448         iter->tr = tr;
4449
4450 #ifdef CONFIG_TRACER_MAX_TRACE
4451         /* Currently only the top directory has a snapshot */
4452         if (tr->current_trace->print_max || snapshot)
4453                 iter->array_buffer = &tr->max_buffer;
4454         else
4455 #endif
4456                 iter->array_buffer = &tr->array_buffer;
4457         iter->snapshot = snapshot;
4458         iter->pos = -1;
4459         iter->cpu_file = tracing_get_cpu(inode);
4460         mutex_init(&iter->mutex);
4461
4462         /* Notify the tracer early; before we stop tracing. */
4463         if (iter->trace->open)
4464                 iter->trace->open(iter);
4465
4466         /* Annotate start of buffers if we had overruns */
4467         if (ring_buffer_overruns(iter->array_buffer->buffer))
4468                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4469
4470         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4471         if (trace_clocks[tr->clock_id].in_ns)
4472                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4473
4474         /*
4475          * If pause-on-trace is enabled, then stop the trace while
4476          * dumping, unless this is the "snapshot" file
4477          */
4478         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4479                 tracing_stop_tr(tr);
4480
4481         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4482                 for_each_tracing_cpu(cpu) {
4483                         iter->buffer_iter[cpu] =
4484                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4485                                                          cpu, GFP_KERNEL);
4486                 }
4487                 ring_buffer_read_prepare_sync();
4488                 for_each_tracing_cpu(cpu) {
4489                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4490                         tracing_iter_reset(iter, cpu);
4491                 }
4492         } else {
4493                 cpu = iter->cpu_file;
4494                 iter->buffer_iter[cpu] =
4495                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4496                                                  cpu, GFP_KERNEL);
4497                 ring_buffer_read_prepare_sync();
4498                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4499                 tracing_iter_reset(iter, cpu);
4500         }
4501
4502         mutex_unlock(&trace_types_lock);
4503
4504         return iter;
4505
4506  fail:
4507         mutex_unlock(&trace_types_lock);
4508         kfree(iter->trace);
4509         kfree(iter->temp);
4510         kfree(iter->buffer_iter);
4511 release:
4512         seq_release_private(inode, file);
4513         return ERR_PTR(-ENOMEM);
4514 }
4515
4516 int tracing_open_generic(struct inode *inode, struct file *filp)
4517 {
4518         int ret;
4519
4520         ret = tracing_check_open_get_tr(NULL);
4521         if (ret)
4522                 return ret;
4523
4524         filp->private_data = inode->i_private;
4525         return 0;
4526 }
4527
4528 bool tracing_is_disabled(void)
4529 {
4530         return (tracing_disabled) ? true: false;
4531 }
4532
4533 /*
4534  * Open and update trace_array ref count.
4535  * Must have the current trace_array passed to it.
4536  */
4537 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4538 {
4539         struct trace_array *tr = inode->i_private;
4540         int ret;
4541
4542         ret = tracing_check_open_get_tr(tr);
4543         if (ret)
4544                 return ret;
4545
4546         filp->private_data = inode->i_private;
4547
4548         return 0;
4549 }
4550
4551 /*
4552  * The private pointer of the inode is the trace_event_file.
4553  * Update the tr ref count associated to it.
4554  */
4555 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4556 {
4557         struct trace_event_file *file = inode->i_private;
4558         int ret;
4559
4560         ret = tracing_check_open_get_tr(file->tr);
4561         if (ret)
4562                 return ret;
4563
4564         mutex_lock(&event_mutex);
4565
4566         /* Fail if the file is marked for removal */
4567         if (file->flags & EVENT_FILE_FL_FREED) {
4568                 trace_array_put(file->tr);
4569                 ret = -ENODEV;
4570         } else {
4571                 event_file_get(file);
4572         }
4573
4574         mutex_unlock(&event_mutex);
4575         if (ret)
4576                 return ret;
4577
4578         filp->private_data = inode->i_private;
4579
4580         return 0;
4581 }
4582
4583 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4584 {
4585         struct trace_event_file *file = inode->i_private;
4586
4587         trace_array_put(file->tr);
4588         event_file_put(file);
4589
4590         return 0;
4591 }
4592
4593 static int tracing_release(struct inode *inode, struct file *file)
4594 {
4595         struct trace_array *tr = inode->i_private;
4596         struct seq_file *m = file->private_data;
4597         struct trace_iterator *iter;
4598         int cpu;
4599
4600         if (!(file->f_mode & FMODE_READ)) {
4601                 trace_array_put(tr);
4602                 return 0;
4603         }
4604
4605         /* Writes do not use seq_file */
4606         iter = m->private;
4607         mutex_lock(&trace_types_lock);
4608
4609         for_each_tracing_cpu(cpu) {
4610                 if (iter->buffer_iter[cpu])
4611                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4612         }
4613
4614         if (iter->trace && iter->trace->close)
4615                 iter->trace->close(iter);
4616
4617         if (!iter->snapshot && tr->stop_count)
4618                 /* reenable tracing if it was previously enabled */
4619                 tracing_start_tr(tr);
4620
4621         __trace_array_put(tr);
4622
4623         mutex_unlock(&trace_types_lock);
4624
4625         mutex_destroy(&iter->mutex);
4626         free_cpumask_var(iter->started);
4627         kfree(iter->fmt);
4628         kfree(iter->temp);
4629         kfree(iter->trace);
4630         kfree(iter->buffer_iter);
4631         seq_release_private(inode, file);
4632
4633         return 0;
4634 }
4635
4636 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4637 {
4638         struct trace_array *tr = inode->i_private;
4639
4640         trace_array_put(tr);
4641         return 0;
4642 }
4643
4644 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4645 {
4646         struct trace_array *tr = inode->i_private;
4647
4648         trace_array_put(tr);
4649
4650         return single_release(inode, file);
4651 }
4652
4653 static int tracing_open(struct inode *inode, struct file *file)
4654 {
4655         struct trace_array *tr = inode->i_private;
4656         struct trace_iterator *iter;
4657         int ret;
4658
4659         ret = tracing_check_open_get_tr(tr);
4660         if (ret)
4661                 return ret;
4662
4663         /* If this file was open for write, then erase contents */
4664         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4665                 int cpu = tracing_get_cpu(inode);
4666                 struct array_buffer *trace_buf = &tr->array_buffer;
4667
4668 #ifdef CONFIG_TRACER_MAX_TRACE
4669                 if (tr->current_trace->print_max)
4670                         trace_buf = &tr->max_buffer;
4671 #endif
4672
4673                 if (cpu == RING_BUFFER_ALL_CPUS)
4674                         tracing_reset_online_cpus(trace_buf);
4675                 else
4676                         tracing_reset_cpu(trace_buf, cpu);
4677         }
4678
4679         if (file->f_mode & FMODE_READ) {
4680                 iter = __tracing_open(inode, file, false);
4681                 if (IS_ERR(iter))
4682                         ret = PTR_ERR(iter);
4683                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4684                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4685         }
4686
4687         if (ret < 0)
4688                 trace_array_put(tr);
4689
4690         return ret;
4691 }
4692
4693 /*
4694  * Some tracers are not suitable for instance buffers.
4695  * A tracer is always available for the global array (toplevel)
4696  * or if it explicitly states that it is.
4697  */
4698 static bool
4699 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4700 {
4701         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4702 }
4703
4704 /* Find the next tracer that this trace array may use */
4705 static struct tracer *
4706 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4707 {
4708         while (t && !trace_ok_for_array(t, tr))
4709                 t = t->next;
4710
4711         return t;
4712 }
4713
4714 static void *
4715 t_next(struct seq_file *m, void *v, loff_t *pos)
4716 {
4717         struct trace_array *tr = m->private;
4718         struct tracer *t = v;
4719
4720         (*pos)++;
4721
4722         if (t)
4723                 t = get_tracer_for_array(tr, t->next);
4724
4725         return t;
4726 }
4727
4728 static void *t_start(struct seq_file *m, loff_t *pos)
4729 {
4730         struct trace_array *tr = m->private;
4731         struct tracer *t;
4732         loff_t l = 0;
4733
4734         mutex_lock(&trace_types_lock);
4735
4736         t = get_tracer_for_array(tr, trace_types);
4737         for (; t && l < *pos; t = t_next(m, t, &l))
4738                         ;
4739
4740         return t;
4741 }
4742
4743 static void t_stop(struct seq_file *m, void *p)
4744 {
4745         mutex_unlock(&trace_types_lock);
4746 }
4747
4748 static int t_show(struct seq_file *m, void *v)
4749 {
4750         struct tracer *t = v;
4751
4752         if (!t)
4753                 return 0;
4754
4755         seq_puts(m, t->name);
4756         if (t->next)
4757                 seq_putc(m, ' ');
4758         else
4759                 seq_putc(m, '\n');
4760
4761         return 0;
4762 }
4763
4764 static const struct seq_operations show_traces_seq_ops = {
4765         .start          = t_start,
4766         .next           = t_next,
4767         .stop           = t_stop,
4768         .show           = t_show,
4769 };
4770
4771 static int show_traces_open(struct inode *inode, struct file *file)
4772 {
4773         struct trace_array *tr = inode->i_private;
4774         struct seq_file *m;
4775         int ret;
4776
4777         ret = tracing_check_open_get_tr(tr);
4778         if (ret)
4779                 return ret;
4780
4781         ret = seq_open(file, &show_traces_seq_ops);
4782         if (ret) {
4783                 trace_array_put(tr);
4784                 return ret;
4785         }
4786
4787         m = file->private_data;
4788         m->private = tr;
4789
4790         return 0;
4791 }
4792
4793 static int show_traces_release(struct inode *inode, struct file *file)
4794 {
4795         struct trace_array *tr = inode->i_private;
4796
4797         trace_array_put(tr);
4798         return seq_release(inode, file);
4799 }
4800
4801 static ssize_t
4802 tracing_write_stub(struct file *filp, const char __user *ubuf,
4803                    size_t count, loff_t *ppos)
4804 {
4805         return count;
4806 }
4807
4808 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4809 {
4810         int ret;
4811
4812         if (file->f_mode & FMODE_READ)
4813                 ret = seq_lseek(file, offset, whence);
4814         else
4815                 file->f_pos = ret = 0;
4816
4817         return ret;
4818 }
4819
4820 static const struct file_operations tracing_fops = {
4821         .open           = tracing_open,
4822         .read           = seq_read,
4823         .read_iter      = seq_read_iter,
4824         .splice_read    = generic_file_splice_read,
4825         .write          = tracing_write_stub,
4826         .llseek         = tracing_lseek,
4827         .release        = tracing_release,
4828 };
4829
4830 static const struct file_operations show_traces_fops = {
4831         .open           = show_traces_open,
4832         .read           = seq_read,
4833         .llseek         = seq_lseek,
4834         .release        = show_traces_release,
4835 };
4836
4837 static ssize_t
4838 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4839                      size_t count, loff_t *ppos)
4840 {
4841         struct trace_array *tr = file_inode(filp)->i_private;
4842         char *mask_str;
4843         int len;
4844
4845         len = snprintf(NULL, 0, "%*pb\n",
4846                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4847         mask_str = kmalloc(len, GFP_KERNEL);
4848         if (!mask_str)
4849                 return -ENOMEM;
4850
4851         len = snprintf(mask_str, len, "%*pb\n",
4852                        cpumask_pr_args(tr->tracing_cpumask));
4853         if (len >= count) {
4854                 count = -EINVAL;
4855                 goto out_err;
4856         }
4857         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4858
4859 out_err:
4860         kfree(mask_str);
4861
4862         return count;
4863 }
4864
4865 int tracing_set_cpumask(struct trace_array *tr,
4866                         cpumask_var_t tracing_cpumask_new)
4867 {
4868         int cpu;
4869
4870         if (!tr)
4871                 return -EINVAL;
4872
4873         local_irq_disable();
4874         arch_spin_lock(&tr->max_lock);
4875         for_each_tracing_cpu(cpu) {
4876                 /*
4877                  * Increase/decrease the disabled counter if we are
4878                  * about to flip a bit in the cpumask:
4879                  */
4880                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4881                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4882                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4883                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4884 #ifdef CONFIG_TRACER_MAX_TRACE
4885                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
4886 #endif
4887                 }
4888                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4889                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4890                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4891                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4892 #ifdef CONFIG_TRACER_MAX_TRACE
4893                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
4894 #endif
4895                 }
4896         }
4897         arch_spin_unlock(&tr->max_lock);
4898         local_irq_enable();
4899
4900         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4901
4902         return 0;
4903 }
4904
4905 static ssize_t
4906 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4907                       size_t count, loff_t *ppos)
4908 {
4909         struct trace_array *tr = file_inode(filp)->i_private;
4910         cpumask_var_t tracing_cpumask_new;
4911         int err;
4912
4913         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4914                 return -ENOMEM;
4915
4916         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4917         if (err)
4918                 goto err_free;
4919
4920         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4921         if (err)
4922                 goto err_free;
4923
4924         free_cpumask_var(tracing_cpumask_new);
4925
4926         return count;
4927
4928 err_free:
4929         free_cpumask_var(tracing_cpumask_new);
4930
4931         return err;
4932 }
4933
4934 static const struct file_operations tracing_cpumask_fops = {
4935         .open           = tracing_open_generic_tr,
4936         .read           = tracing_cpumask_read,
4937         .write          = tracing_cpumask_write,
4938         .release        = tracing_release_generic_tr,
4939         .llseek         = generic_file_llseek,
4940 };
4941
4942 static int tracing_trace_options_show(struct seq_file *m, void *v)
4943 {
4944         struct tracer_opt *trace_opts;
4945         struct trace_array *tr = m->private;
4946         u32 tracer_flags;
4947         int i;
4948
4949         mutex_lock(&trace_types_lock);
4950         tracer_flags = tr->current_trace->flags->val;
4951         trace_opts = tr->current_trace->flags->opts;
4952
4953         for (i = 0; trace_options[i]; i++) {
4954                 if (tr->trace_flags & (1 << i))
4955                         seq_printf(m, "%s\n", trace_options[i]);
4956                 else
4957                         seq_printf(m, "no%s\n", trace_options[i]);
4958         }
4959
4960         for (i = 0; trace_opts[i].name; i++) {
4961                 if (tracer_flags & trace_opts[i].bit)
4962                         seq_printf(m, "%s\n", trace_opts[i].name);
4963                 else
4964                         seq_printf(m, "no%s\n", trace_opts[i].name);
4965         }
4966         mutex_unlock(&trace_types_lock);
4967
4968         return 0;
4969 }
4970
4971 static int __set_tracer_option(struct trace_array *tr,
4972                                struct tracer_flags *tracer_flags,
4973                                struct tracer_opt *opts, int neg)
4974 {
4975         struct tracer *trace = tracer_flags->trace;
4976         int ret;
4977
4978         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4979         if (ret)
4980                 return ret;
4981
4982         if (neg)
4983                 tracer_flags->val &= ~opts->bit;
4984         else
4985                 tracer_flags->val |= opts->bit;
4986         return 0;
4987 }
4988
4989 /* Try to assign a tracer specific option */
4990 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4991 {
4992         struct tracer *trace = tr->current_trace;
4993         struct tracer_flags *tracer_flags = trace->flags;
4994         struct tracer_opt *opts = NULL;
4995         int i;
4996
4997         for (i = 0; tracer_flags->opts[i].name; i++) {
4998                 opts = &tracer_flags->opts[i];
4999
5000                 if (strcmp(cmp, opts->name) == 0)
5001                         return __set_tracer_option(tr, trace->flags, opts, neg);
5002         }
5003
5004         return -EINVAL;
5005 }
5006
5007 /* Some tracers require overwrite to stay enabled */
5008 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5009 {
5010         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5011                 return -1;
5012
5013         return 0;
5014 }
5015
5016 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5017 {
5018         int *map;
5019
5020         if ((mask == TRACE_ITER_RECORD_TGID) ||
5021             (mask == TRACE_ITER_RECORD_CMD))
5022                 lockdep_assert_held(&event_mutex);
5023
5024         /* do nothing if flag is already set */
5025         if (!!(tr->trace_flags & mask) == !!enabled)
5026                 return 0;
5027
5028         /* Give the tracer a chance to approve the change */
5029         if (tr->current_trace->flag_changed)
5030                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5031                         return -EINVAL;
5032
5033         if (enabled)
5034                 tr->trace_flags |= mask;
5035         else
5036                 tr->trace_flags &= ~mask;
5037
5038         if (mask == TRACE_ITER_RECORD_CMD)
5039                 trace_event_enable_cmd_record(enabled);
5040
5041         if (mask == TRACE_ITER_RECORD_TGID) {
5042                 if (!tgid_map) {
5043                         tgid_map_max = pid_max;
5044                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5045                                        GFP_KERNEL);
5046
5047                         /*
5048                          * Pairs with smp_load_acquire() in
5049                          * trace_find_tgid_ptr() to ensure that if it observes
5050                          * the tgid_map we just allocated then it also observes
5051                          * the corresponding tgid_map_max value.
5052                          */
5053                         smp_store_release(&tgid_map, map);
5054                 }
5055                 if (!tgid_map) {
5056                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5057                         return -ENOMEM;
5058                 }
5059
5060                 trace_event_enable_tgid_record(enabled);
5061         }
5062
5063         if (mask == TRACE_ITER_EVENT_FORK)
5064                 trace_event_follow_fork(tr, enabled);
5065
5066         if (mask == TRACE_ITER_FUNC_FORK)
5067                 ftrace_pid_follow_fork(tr, enabled);
5068
5069         if (mask == TRACE_ITER_OVERWRITE) {
5070                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5071 #ifdef CONFIG_TRACER_MAX_TRACE
5072                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5073 #endif
5074         }
5075
5076         if (mask == TRACE_ITER_PRINTK) {
5077                 trace_printk_start_stop_comm(enabled);
5078                 trace_printk_control(enabled);
5079         }
5080
5081         return 0;
5082 }
5083
5084 int trace_set_options(struct trace_array *tr, char *option)
5085 {
5086         char *cmp;
5087         int neg = 0;
5088         int ret;
5089         size_t orig_len = strlen(option);
5090         int len;
5091
5092         cmp = strstrip(option);
5093
5094         len = str_has_prefix(cmp, "no");
5095         if (len)
5096                 neg = 1;
5097
5098         cmp += len;
5099
5100         mutex_lock(&event_mutex);
5101         mutex_lock(&trace_types_lock);
5102
5103         ret = match_string(trace_options, -1, cmp);
5104         /* If no option could be set, test the specific tracer options */
5105         if (ret < 0)
5106                 ret = set_tracer_option(tr, cmp, neg);
5107         else
5108                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5109
5110         mutex_unlock(&trace_types_lock);
5111         mutex_unlock(&event_mutex);
5112
5113         /*
5114          * If the first trailing whitespace is replaced with '\0' by strstrip,
5115          * turn it back into a space.
5116          */
5117         if (orig_len > strlen(option))
5118                 option[strlen(option)] = ' ';
5119
5120         return ret;
5121 }
5122
5123 static void __init apply_trace_boot_options(void)
5124 {
5125         char *buf = trace_boot_options_buf;
5126         char *option;
5127
5128         while (true) {
5129                 option = strsep(&buf, ",");
5130
5131                 if (!option)
5132                         break;
5133
5134                 if (*option)
5135                         trace_set_options(&global_trace, option);
5136
5137                 /* Put back the comma to allow this to be called again */
5138                 if (buf)
5139                         *(buf - 1) = ',';
5140         }
5141 }
5142
5143 static ssize_t
5144 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5145                         size_t cnt, loff_t *ppos)
5146 {
5147         struct seq_file *m = filp->private_data;
5148         struct trace_array *tr = m->private;
5149         char buf[64];
5150         int ret;
5151
5152         if (cnt >= sizeof(buf))
5153                 return -EINVAL;
5154
5155         if (copy_from_user(buf, ubuf, cnt))
5156                 return -EFAULT;
5157
5158         buf[cnt] = 0;
5159
5160         ret = trace_set_options(tr, buf);
5161         if (ret < 0)
5162                 return ret;
5163
5164         *ppos += cnt;
5165
5166         return cnt;
5167 }
5168
5169 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5170 {
5171         struct trace_array *tr = inode->i_private;
5172         int ret;
5173
5174         ret = tracing_check_open_get_tr(tr);
5175         if (ret)
5176                 return ret;
5177
5178         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5179         if (ret < 0)
5180                 trace_array_put(tr);
5181
5182         return ret;
5183 }
5184
5185 static const struct file_operations tracing_iter_fops = {
5186         .open           = tracing_trace_options_open,
5187         .read           = seq_read,
5188         .llseek         = seq_lseek,
5189         .release        = tracing_single_release_tr,
5190         .write          = tracing_trace_options_write,
5191 };
5192
5193 static const char readme_msg[] =
5194         "tracing mini-HOWTO:\n\n"
5195         "# echo 0 > tracing_on : quick way to disable tracing\n"
5196         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5197         " Important files:\n"
5198         "  trace\t\t\t- The static contents of the buffer\n"
5199         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5200         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5201         "  current_tracer\t- function and latency tracers\n"
5202         "  available_tracers\t- list of configured tracers for current_tracer\n"
5203         "  error_log\t- error log for failed commands (that support it)\n"
5204         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5205         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5206         "  trace_clock\t\t-change the clock used to order events\n"
5207         "       local:   Per cpu clock but may not be synced across CPUs\n"
5208         "      global:   Synced across CPUs but slows tracing down.\n"
5209         "     counter:   Not a clock, but just an increment\n"
5210         "      uptime:   Jiffy counter from time of boot\n"
5211         "        perf:   Same clock that perf events use\n"
5212 #ifdef CONFIG_X86_64
5213         "     x86-tsc:   TSC cycle counter\n"
5214 #endif
5215         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5216         "       delta:   Delta difference against a buffer-wide timestamp\n"
5217         "    absolute:   Absolute (standalone) timestamp\n"
5218         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5219         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5220         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5221         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5222         "\t\t\t  Remove sub-buffer with rmdir\n"
5223         "  trace_options\t\t- Set format or modify how tracing happens\n"
5224         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5225         "\t\t\t  option name\n"
5226         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5227 #ifdef CONFIG_DYNAMIC_FTRACE
5228         "\n  available_filter_functions - list of functions that can be filtered on\n"
5229         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5230         "\t\t\t  functions\n"
5231         "\t     accepts: func_full_name or glob-matching-pattern\n"
5232         "\t     modules: Can select a group via module\n"
5233         "\t      Format: :mod:<module-name>\n"
5234         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5235         "\t    triggers: a command to perform when function is hit\n"
5236         "\t      Format: <function>:<trigger>[:count]\n"
5237         "\t     trigger: traceon, traceoff\n"
5238         "\t\t      enable_event:<system>:<event>\n"
5239         "\t\t      disable_event:<system>:<event>\n"
5240 #ifdef CONFIG_STACKTRACE
5241         "\t\t      stacktrace\n"
5242 #endif
5243 #ifdef CONFIG_TRACER_SNAPSHOT
5244         "\t\t      snapshot\n"
5245 #endif
5246         "\t\t      dump\n"
5247         "\t\t      cpudump\n"
5248         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5249         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5250         "\t     The first one will disable tracing every time do_fault is hit\n"
5251         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5252         "\t       The first time do trap is hit and it disables tracing, the\n"
5253         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5254         "\t       the counter will not decrement. It only decrements when the\n"
5255         "\t       trigger did work\n"
5256         "\t     To remove trigger without count:\n"
5257         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5258         "\t     To remove trigger with a count:\n"
5259         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5260         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5261         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5262         "\t    modules: Can select a group via module command :mod:\n"
5263         "\t    Does not accept triggers\n"
5264 #endif /* CONFIG_DYNAMIC_FTRACE */
5265 #ifdef CONFIG_FUNCTION_TRACER
5266         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5267         "\t\t    (function)\n"
5268         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5269         "\t\t    (function)\n"
5270 #endif
5271 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5272         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5273         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5274         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5275 #endif
5276 #ifdef CONFIG_TRACER_SNAPSHOT
5277         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5278         "\t\t\t  snapshot buffer. Read the contents for more\n"
5279         "\t\t\t  information\n"
5280 #endif
5281 #ifdef CONFIG_STACK_TRACER
5282         "  stack_trace\t\t- Shows the max stack trace when active\n"
5283         "  stack_max_size\t- Shows current max stack size that was traced\n"
5284         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5285         "\t\t\t  new trace)\n"
5286 #ifdef CONFIG_DYNAMIC_FTRACE
5287         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5288         "\t\t\t  traces\n"
5289 #endif
5290 #endif /* CONFIG_STACK_TRACER */
5291 #ifdef CONFIG_DYNAMIC_EVENTS
5292         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5293         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5294 #endif
5295 #ifdef CONFIG_KPROBE_EVENTS
5296         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5297         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5298 #endif
5299 #ifdef CONFIG_UPROBE_EVENTS
5300         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5301         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5302 #endif
5303 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5304         "\t  accepts: event-definitions (one definition per line)\n"
5305         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5306         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5307 #ifdef CONFIG_HIST_TRIGGERS
5308         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5309 #endif
5310         "\t           -:[<group>/]<event>\n"
5311 #ifdef CONFIG_KPROBE_EVENTS
5312         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5313   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5314 #endif
5315 #ifdef CONFIG_UPROBE_EVENTS
5316   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5317 #endif
5318         "\t     args: <name>=fetcharg[:type]\n"
5319         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5320 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5321         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5322 #else
5323         "\t           $stack<index>, $stack, $retval, $comm,\n"
5324 #endif
5325         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5326         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5327         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5328         "\t           <type>\\[<array-size>\\]\n"
5329 #ifdef CONFIG_HIST_TRIGGERS
5330         "\t    field: <stype> <name>;\n"
5331         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5332         "\t           [unsigned] char/int/long\n"
5333 #endif
5334 #endif
5335         "  events/\t\t- Directory containing all trace event subsystems:\n"
5336         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5337         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5338         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5339         "\t\t\t  events\n"
5340         "      filter\t\t- If set, only events passing filter are traced\n"
5341         "  events/<system>/<event>/\t- Directory containing control files for\n"
5342         "\t\t\t  <event>:\n"
5343         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5344         "      filter\t\t- If set, only events passing filter are traced\n"
5345         "      trigger\t\t- If set, a command to perform when event is hit\n"
5346         "\t    Format: <trigger>[:count][if <filter>]\n"
5347         "\t   trigger: traceon, traceoff\n"
5348         "\t            enable_event:<system>:<event>\n"
5349         "\t            disable_event:<system>:<event>\n"
5350 #ifdef CONFIG_HIST_TRIGGERS
5351         "\t            enable_hist:<system>:<event>\n"
5352         "\t            disable_hist:<system>:<event>\n"
5353 #endif
5354 #ifdef CONFIG_STACKTRACE
5355         "\t\t    stacktrace\n"
5356 #endif
5357 #ifdef CONFIG_TRACER_SNAPSHOT
5358         "\t\t    snapshot\n"
5359 #endif
5360 #ifdef CONFIG_HIST_TRIGGERS
5361         "\t\t    hist (see below)\n"
5362 #endif
5363         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5364         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5365         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5366         "\t                  events/block/block_unplug/trigger\n"
5367         "\t   The first disables tracing every time block_unplug is hit.\n"
5368         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5369         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5370         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5371         "\t   Like function triggers, the counter is only decremented if it\n"
5372         "\t    enabled or disabled tracing.\n"
5373         "\t   To remove a trigger without a count:\n"
5374         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5375         "\t   To remove a trigger with a count:\n"
5376         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5377         "\t   Filters can be ignored when removing a trigger.\n"
5378 #ifdef CONFIG_HIST_TRIGGERS
5379         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5380         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5381         "\t            [:values=<field1[,field2,...]>]\n"
5382         "\t            [:sort=<field1[,field2,...]>]\n"
5383         "\t            [:size=#entries]\n"
5384         "\t            [:pause][:continue][:clear]\n"
5385         "\t            [:name=histname1]\n"
5386         "\t            [:<handler>.<action>]\n"
5387         "\t            [if <filter>]\n\n"
5388         "\t    Note, special fields can be used as well:\n"
5389         "\t            common_timestamp - to record current timestamp\n"
5390         "\t            common_cpu - to record the CPU the event happened on\n"
5391         "\n"
5392         "\t    When a matching event is hit, an entry is added to a hash\n"
5393         "\t    table using the key(s) and value(s) named, and the value of a\n"
5394         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5395         "\t    correspond to fields in the event's format description.  Keys\n"
5396         "\t    can be any field, or the special string 'stacktrace'.\n"
5397         "\t    Compound keys consisting of up to two fields can be specified\n"
5398         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5399         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5400         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5401         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5402         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5403         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5404         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5405         "\t    its histogram data will be shared with other triggers of the\n"
5406         "\t    same name, and trigger hits will update this common data.\n\n"
5407         "\t    Reading the 'hist' file for the event will dump the hash\n"
5408         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5409         "\t    triggers attached to an event, there will be a table for each\n"
5410         "\t    trigger in the output.  The table displayed for a named\n"
5411         "\t    trigger will be the same as any other instance having the\n"
5412         "\t    same name.  The default format used to display a given field\n"
5413         "\t    can be modified by appending any of the following modifiers\n"
5414         "\t    to the field name, as applicable:\n\n"
5415         "\t            .hex        display a number as a hex value\n"
5416         "\t            .sym        display an address as a symbol\n"
5417         "\t            .sym-offset display an address as a symbol and offset\n"
5418         "\t            .execname   display a common_pid as a program name\n"
5419         "\t            .syscall    display a syscall id as a syscall name\n"
5420         "\t            .log2       display log2 value rather than raw number\n"
5421         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5422         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5423         "\t    trigger or to start a hist trigger but not log any events\n"
5424         "\t    until told to do so.  'continue' can be used to start or\n"
5425         "\t    restart a paused hist trigger.\n\n"
5426         "\t    The 'clear' parameter will clear the contents of a running\n"
5427         "\t    hist trigger and leave its current paused/active state\n"
5428         "\t    unchanged.\n\n"
5429         "\t    The enable_hist and disable_hist triggers can be used to\n"
5430         "\t    have one event conditionally start and stop another event's\n"
5431         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5432         "\t    the enable_event and disable_event triggers.\n\n"
5433         "\t    Hist trigger handlers and actions are executed whenever a\n"
5434         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5435         "\t        <handler>.<action>\n\n"
5436         "\t    The available handlers are:\n\n"
5437         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5438         "\t        onmax(var)               - invoke if var exceeds current max\n"
5439         "\t        onchange(var)            - invoke action if var changes\n\n"
5440         "\t    The available actions are:\n\n"
5441         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5442         "\t        save(field,...)                      - save current event fields\n"
5443 #ifdef CONFIG_TRACER_SNAPSHOT
5444         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5445 #endif
5446 #ifdef CONFIG_SYNTH_EVENTS
5447         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5448         "\t  Write into this file to define/undefine new synthetic events.\n"
5449         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5450 #endif
5451 #endif
5452 ;
5453
5454 static ssize_t
5455 tracing_readme_read(struct file *filp, char __user *ubuf,
5456                        size_t cnt, loff_t *ppos)
5457 {
5458         return simple_read_from_buffer(ubuf, cnt, ppos,
5459                                         readme_msg, strlen(readme_msg));
5460 }
5461
5462 static const struct file_operations tracing_readme_fops = {
5463         .open           = tracing_open_generic,
5464         .read           = tracing_readme_read,
5465         .llseek         = generic_file_llseek,
5466 };
5467
5468 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5469 {
5470         int pid = ++(*pos);
5471
5472         return trace_find_tgid_ptr(pid);
5473 }
5474
5475 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5476 {
5477         int pid = *pos;
5478
5479         return trace_find_tgid_ptr(pid);
5480 }
5481
5482 static void saved_tgids_stop(struct seq_file *m, void *v)
5483 {
5484 }
5485
5486 static int saved_tgids_show(struct seq_file *m, void *v)
5487 {
5488         int *entry = (int *)v;
5489         int pid = entry - tgid_map;
5490         int tgid = *entry;
5491
5492         if (tgid == 0)
5493                 return SEQ_SKIP;
5494
5495         seq_printf(m, "%d %d\n", pid, tgid);
5496         return 0;
5497 }
5498
5499 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5500         .start          = saved_tgids_start,
5501         .stop           = saved_tgids_stop,
5502         .next           = saved_tgids_next,
5503         .show           = saved_tgids_show,
5504 };
5505
5506 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5507 {
5508         int ret;
5509
5510         ret = tracing_check_open_get_tr(NULL);
5511         if (ret)
5512                 return ret;
5513
5514         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5515 }
5516
5517
5518 static const struct file_operations tracing_saved_tgids_fops = {
5519         .open           = tracing_saved_tgids_open,
5520         .read           = seq_read,
5521         .llseek         = seq_lseek,
5522         .release        = seq_release,
5523 };
5524
5525 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5526 {
5527         unsigned int *ptr = v;
5528
5529         if (*pos || m->count)
5530                 ptr++;
5531
5532         (*pos)++;
5533
5534         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5535              ptr++) {
5536                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5537                         continue;
5538
5539                 return ptr;
5540         }
5541
5542         return NULL;
5543 }
5544
5545 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5546 {
5547         void *v;
5548         loff_t l = 0;
5549
5550         preempt_disable();
5551         arch_spin_lock(&trace_cmdline_lock);
5552
5553         v = &savedcmd->map_cmdline_to_pid[0];
5554         while (l <= *pos) {
5555                 v = saved_cmdlines_next(m, v, &l);
5556                 if (!v)
5557                         return NULL;
5558         }
5559
5560         return v;
5561 }
5562
5563 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5564 {
5565         arch_spin_unlock(&trace_cmdline_lock);
5566         preempt_enable();
5567 }
5568
5569 static int saved_cmdlines_show(struct seq_file *m, void *v)
5570 {
5571         char buf[TASK_COMM_LEN];
5572         unsigned int *pid = v;
5573
5574         __trace_find_cmdline(*pid, buf);
5575         seq_printf(m, "%d %s\n", *pid, buf);
5576         return 0;
5577 }
5578
5579 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5580         .start          = saved_cmdlines_start,
5581         .next           = saved_cmdlines_next,
5582         .stop           = saved_cmdlines_stop,
5583         .show           = saved_cmdlines_show,
5584 };
5585
5586 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5587 {
5588         int ret;
5589
5590         ret = tracing_check_open_get_tr(NULL);
5591         if (ret)
5592                 return ret;
5593
5594         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5595 }
5596
5597 static const struct file_operations tracing_saved_cmdlines_fops = {
5598         .open           = tracing_saved_cmdlines_open,
5599         .read           = seq_read,
5600         .llseek         = seq_lseek,
5601         .release        = seq_release,
5602 };
5603
5604 static ssize_t
5605 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5606                                  size_t cnt, loff_t *ppos)
5607 {
5608         char buf[64];
5609         int r;
5610
5611         preempt_disable();
5612         arch_spin_lock(&trace_cmdline_lock);
5613         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5614         arch_spin_unlock(&trace_cmdline_lock);
5615         preempt_enable();
5616
5617         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5618 }
5619
5620 static int tracing_resize_saved_cmdlines(unsigned int val)
5621 {
5622         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5623
5624         s = allocate_cmdlines_buffer(val);
5625         if (!s)
5626                 return -ENOMEM;
5627
5628         preempt_disable();
5629         arch_spin_lock(&trace_cmdline_lock);
5630         savedcmd_temp = savedcmd;
5631         savedcmd = s;
5632         arch_spin_unlock(&trace_cmdline_lock);
5633         preempt_enable();
5634         free_saved_cmdlines_buffer(savedcmd_temp);
5635
5636         return 0;
5637 }
5638
5639 static ssize_t
5640 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5641                                   size_t cnt, loff_t *ppos)
5642 {
5643         unsigned long val;
5644         int ret;
5645
5646         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5647         if (ret)
5648                 return ret;
5649
5650         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5651         if (!val || val > PID_MAX_DEFAULT)
5652                 return -EINVAL;
5653
5654         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5655         if (ret < 0)
5656                 return ret;
5657
5658         *ppos += cnt;
5659
5660         return cnt;
5661 }
5662
5663 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5664         .open           = tracing_open_generic,
5665         .read           = tracing_saved_cmdlines_size_read,
5666         .write          = tracing_saved_cmdlines_size_write,
5667 };
5668
5669 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5670 static union trace_eval_map_item *
5671 update_eval_map(union trace_eval_map_item *ptr)
5672 {
5673         if (!ptr->map.eval_string) {
5674                 if (ptr->tail.next) {
5675                         ptr = ptr->tail.next;
5676                         /* Set ptr to the next real item (skip head) */
5677                         ptr++;
5678                 } else
5679                         return NULL;
5680         }
5681         return ptr;
5682 }
5683
5684 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5685 {
5686         union trace_eval_map_item *ptr = v;
5687
5688         /*
5689          * Paranoid! If ptr points to end, we don't want to increment past it.
5690          * This really should never happen.
5691          */
5692         (*pos)++;
5693         ptr = update_eval_map(ptr);
5694         if (WARN_ON_ONCE(!ptr))
5695                 return NULL;
5696
5697         ptr++;
5698         ptr = update_eval_map(ptr);
5699
5700         return ptr;
5701 }
5702
5703 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5704 {
5705         union trace_eval_map_item *v;
5706         loff_t l = 0;
5707
5708         mutex_lock(&trace_eval_mutex);
5709
5710         v = trace_eval_maps;
5711         if (v)
5712                 v++;
5713
5714         while (v && l < *pos) {
5715                 v = eval_map_next(m, v, &l);
5716         }
5717
5718         return v;
5719 }
5720
5721 static void eval_map_stop(struct seq_file *m, void *v)
5722 {
5723         mutex_unlock(&trace_eval_mutex);
5724 }
5725
5726 static int eval_map_show(struct seq_file *m, void *v)
5727 {
5728         union trace_eval_map_item *ptr = v;
5729
5730         seq_printf(m, "%s %ld (%s)\n",
5731                    ptr->map.eval_string, ptr->map.eval_value,
5732                    ptr->map.system);
5733
5734         return 0;
5735 }
5736
5737 static const struct seq_operations tracing_eval_map_seq_ops = {
5738         .start          = eval_map_start,
5739         .next           = eval_map_next,
5740         .stop           = eval_map_stop,
5741         .show           = eval_map_show,
5742 };
5743
5744 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5745 {
5746         int ret;
5747
5748         ret = tracing_check_open_get_tr(NULL);
5749         if (ret)
5750                 return ret;
5751
5752         return seq_open(filp, &tracing_eval_map_seq_ops);
5753 }
5754
5755 static const struct file_operations tracing_eval_map_fops = {
5756         .open           = tracing_eval_map_open,
5757         .read           = seq_read,
5758         .llseek         = seq_lseek,
5759         .release        = seq_release,
5760 };
5761
5762 static inline union trace_eval_map_item *
5763 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5764 {
5765         /* Return tail of array given the head */
5766         return ptr + ptr->head.length + 1;
5767 }
5768
5769 static void
5770 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5771                            int len)
5772 {
5773         struct trace_eval_map **stop;
5774         struct trace_eval_map **map;
5775         union trace_eval_map_item *map_array;
5776         union trace_eval_map_item *ptr;
5777
5778         stop = start + len;
5779
5780         /*
5781          * The trace_eval_maps contains the map plus a head and tail item,
5782          * where the head holds the module and length of array, and the
5783          * tail holds a pointer to the next list.
5784          */
5785         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5786         if (!map_array) {
5787                 pr_warn("Unable to allocate trace eval mapping\n");
5788                 return;
5789         }
5790
5791         mutex_lock(&trace_eval_mutex);
5792
5793         if (!trace_eval_maps)
5794                 trace_eval_maps = map_array;
5795         else {
5796                 ptr = trace_eval_maps;
5797                 for (;;) {
5798                         ptr = trace_eval_jmp_to_tail(ptr);
5799                         if (!ptr->tail.next)
5800                                 break;
5801                         ptr = ptr->tail.next;
5802
5803                 }
5804                 ptr->tail.next = map_array;
5805         }
5806         map_array->head.mod = mod;
5807         map_array->head.length = len;
5808         map_array++;
5809
5810         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5811                 map_array->map = **map;
5812                 map_array++;
5813         }
5814         memset(map_array, 0, sizeof(*map_array));
5815
5816         mutex_unlock(&trace_eval_mutex);
5817 }
5818
5819 static void trace_create_eval_file(struct dentry *d_tracer)
5820 {
5821         trace_create_file("eval_map", 0444, d_tracer,
5822                           NULL, &tracing_eval_map_fops);
5823 }
5824
5825 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5826 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5827 static inline void trace_insert_eval_map_file(struct module *mod,
5828                               struct trace_eval_map **start, int len) { }
5829 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5830
5831 static void trace_insert_eval_map(struct module *mod,
5832                                   struct trace_eval_map **start, int len)
5833 {
5834         struct trace_eval_map **map;
5835
5836         if (len <= 0)
5837                 return;
5838
5839         map = start;
5840
5841         trace_event_eval_update(map, len);
5842
5843         trace_insert_eval_map_file(mod, start, len);
5844 }
5845
5846 static ssize_t
5847 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5848                        size_t cnt, loff_t *ppos)
5849 {
5850         struct trace_array *tr = filp->private_data;
5851         char buf[MAX_TRACER_SIZE+2];
5852         int r;
5853
5854         mutex_lock(&trace_types_lock);
5855         r = sprintf(buf, "%s\n", tr->current_trace->name);
5856         mutex_unlock(&trace_types_lock);
5857
5858         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5859 }
5860
5861 int tracer_init(struct tracer *t, struct trace_array *tr)
5862 {
5863         tracing_reset_online_cpus(&tr->array_buffer);
5864         return t->init(tr);
5865 }
5866
5867 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5868 {
5869         int cpu;
5870
5871         for_each_tracing_cpu(cpu)
5872                 per_cpu_ptr(buf->data, cpu)->entries = val;
5873 }
5874
5875 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5876 {
5877         if (cpu == RING_BUFFER_ALL_CPUS) {
5878                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5879         } else {
5880                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5881         }
5882 }
5883
5884 #ifdef CONFIG_TRACER_MAX_TRACE
5885 /* resize @tr's buffer to the size of @size_tr's entries */
5886 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5887                                         struct array_buffer *size_buf, int cpu_id)
5888 {
5889         int cpu, ret = 0;
5890
5891         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5892                 for_each_tracing_cpu(cpu) {
5893                         ret = ring_buffer_resize(trace_buf->buffer,
5894                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5895                         if (ret < 0)
5896                                 break;
5897                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5898                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5899                 }
5900         } else {
5901                 ret = ring_buffer_resize(trace_buf->buffer,
5902                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5903                 if (ret == 0)
5904                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5905                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5906         }
5907
5908         return ret;
5909 }
5910 #endif /* CONFIG_TRACER_MAX_TRACE */
5911
5912 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5913                                         unsigned long size, int cpu)
5914 {
5915         int ret;
5916
5917         /*
5918          * If kernel or user changes the size of the ring buffer
5919          * we use the size that was given, and we can forget about
5920          * expanding it later.
5921          */
5922         ring_buffer_expanded = true;
5923
5924         /* May be called before buffers are initialized */
5925         if (!tr->array_buffer.buffer)
5926                 return 0;
5927
5928         /* Do not allow tracing while resizing ring buffer */
5929         tracing_stop_tr(tr);
5930
5931         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5932         if (ret < 0)
5933                 goto out_start;
5934
5935 #ifdef CONFIG_TRACER_MAX_TRACE
5936         if (!tr->allocated_snapshot)
5937                 goto out;
5938
5939         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5940         if (ret < 0) {
5941                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5942                                                      &tr->array_buffer, cpu);
5943                 if (r < 0) {
5944                         /*
5945                          * AARGH! We are left with different
5946                          * size max buffer!!!!
5947                          * The max buffer is our "snapshot" buffer.
5948                          * When a tracer needs a snapshot (one of the
5949                          * latency tracers), it swaps the max buffer
5950                          * with the saved snap shot. We succeeded to
5951                          * update the size of the main buffer, but failed to
5952                          * update the size of the max buffer. But when we tried
5953                          * to reset the main buffer to the original size, we
5954                          * failed there too. This is very unlikely to
5955                          * happen, but if it does, warn and kill all
5956                          * tracing.
5957                          */
5958                         WARN_ON(1);
5959                         tracing_disabled = 1;
5960                 }
5961                 goto out_start;
5962         }
5963
5964         update_buffer_entries(&tr->max_buffer, cpu);
5965
5966  out:
5967 #endif /* CONFIG_TRACER_MAX_TRACE */
5968
5969         update_buffer_entries(&tr->array_buffer, cpu);
5970  out_start:
5971         tracing_start_tr(tr);
5972         return ret;
5973 }
5974
5975 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5976                                   unsigned long size, int cpu_id)
5977 {
5978         int ret = size;
5979
5980         mutex_lock(&trace_types_lock);
5981
5982         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5983                 /* make sure, this cpu is enabled in the mask */
5984                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5985                         ret = -EINVAL;
5986                         goto out;
5987                 }
5988         }
5989
5990         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5991         if (ret < 0)
5992                 ret = -ENOMEM;
5993
5994 out:
5995         mutex_unlock(&trace_types_lock);
5996
5997         return ret;
5998 }
5999
6000
6001 /**
6002  * tracing_update_buffers - used by tracing facility to expand ring buffers
6003  *
6004  * To save on memory when the tracing is never used on a system with it
6005  * configured in. The ring buffers are set to a minimum size. But once
6006  * a user starts to use the tracing facility, then they need to grow
6007  * to their default size.
6008  *
6009  * This function is to be called when a tracer is about to be used.
6010  */
6011 int tracing_update_buffers(void)
6012 {
6013         int ret = 0;
6014
6015         mutex_lock(&trace_types_lock);
6016         if (!ring_buffer_expanded)
6017                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6018                                                 RING_BUFFER_ALL_CPUS);
6019         mutex_unlock(&trace_types_lock);
6020
6021         return ret;
6022 }
6023
6024 struct trace_option_dentry;
6025
6026 static void
6027 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6028
6029 /*
6030  * Used to clear out the tracer before deletion of an instance.
6031  * Must have trace_types_lock held.
6032  */
6033 static void tracing_set_nop(struct trace_array *tr)
6034 {
6035         if (tr->current_trace == &nop_trace)
6036                 return;
6037         
6038         tr->current_trace->enabled--;
6039
6040         if (tr->current_trace->reset)
6041                 tr->current_trace->reset(tr);
6042
6043         tr->current_trace = &nop_trace;
6044 }
6045
6046 static bool tracer_options_updated;
6047
6048 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6049 {
6050         /* Only enable if the directory has been created already. */
6051         if (!tr->dir)
6052                 return;
6053
6054         /* Only create trace option files after update_tracer_options finish */
6055         if (!tracer_options_updated)
6056                 return;
6057
6058         create_trace_option_files(tr, t);
6059 }
6060
6061 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6062 {
6063         struct tracer *t;
6064 #ifdef CONFIG_TRACER_MAX_TRACE
6065         bool had_max_tr;
6066 #endif
6067         int ret = 0;
6068
6069         mutex_lock(&trace_types_lock);
6070
6071         if (!ring_buffer_expanded) {
6072                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6073                                                 RING_BUFFER_ALL_CPUS);
6074                 if (ret < 0)
6075                         goto out;
6076                 ret = 0;
6077         }
6078
6079         for (t = trace_types; t; t = t->next) {
6080                 if (strcmp(t->name, buf) == 0)
6081                         break;
6082         }
6083         if (!t) {
6084                 ret = -EINVAL;
6085                 goto out;
6086         }
6087         if (t == tr->current_trace)
6088                 goto out;
6089
6090 #ifdef CONFIG_TRACER_SNAPSHOT
6091         if (t->use_max_tr) {
6092                 local_irq_disable();
6093                 arch_spin_lock(&tr->max_lock);
6094                 if (tr->cond_snapshot)
6095                         ret = -EBUSY;
6096                 arch_spin_unlock(&tr->max_lock);
6097                 local_irq_enable();
6098                 if (ret)
6099                         goto out;
6100         }
6101 #endif
6102         /* Some tracers won't work on kernel command line */
6103         if (system_state < SYSTEM_RUNNING && t->noboot) {
6104                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6105                         t->name);
6106                 goto out;
6107         }
6108
6109         /* Some tracers are only allowed for the top level buffer */
6110         if (!trace_ok_for_array(t, tr)) {
6111                 ret = -EINVAL;
6112                 goto out;
6113         }
6114
6115         /* If trace pipe files are being read, we can't change the tracer */
6116         if (tr->trace_ref) {
6117                 ret = -EBUSY;
6118                 goto out;
6119         }
6120
6121         trace_branch_disable();
6122
6123         tr->current_trace->enabled--;
6124
6125         if (tr->current_trace->reset)
6126                 tr->current_trace->reset(tr);
6127
6128 #ifdef CONFIG_TRACER_MAX_TRACE
6129         had_max_tr = tr->current_trace->use_max_tr;
6130
6131         /* Current trace needs to be nop_trace before synchronize_rcu */
6132         tr->current_trace = &nop_trace;
6133
6134         if (had_max_tr && !t->use_max_tr) {
6135                 /*
6136                  * We need to make sure that the update_max_tr sees that
6137                  * current_trace changed to nop_trace to keep it from
6138                  * swapping the buffers after we resize it.
6139                  * The update_max_tr is called from interrupts disabled
6140                  * so a synchronized_sched() is sufficient.
6141                  */
6142                 synchronize_rcu();
6143                 free_snapshot(tr);
6144         }
6145
6146         if (t->use_max_tr && !tr->allocated_snapshot) {
6147                 ret = tracing_alloc_snapshot_instance(tr);
6148                 if (ret < 0)
6149                         goto out;
6150         }
6151 #else
6152         tr->current_trace = &nop_trace;
6153 #endif
6154
6155         if (t->init) {
6156                 ret = tracer_init(t, tr);
6157                 if (ret)
6158                         goto out;
6159         }
6160
6161         tr->current_trace = t;
6162         tr->current_trace->enabled++;
6163         trace_branch_enable(tr);
6164  out:
6165         mutex_unlock(&trace_types_lock);
6166
6167         return ret;
6168 }
6169
6170 static ssize_t
6171 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6172                         size_t cnt, loff_t *ppos)
6173 {
6174         struct trace_array *tr = filp->private_data;
6175         char buf[MAX_TRACER_SIZE+1];
6176         int i;
6177         size_t ret;
6178         int err;
6179
6180         ret = cnt;
6181
6182         if (cnt > MAX_TRACER_SIZE)
6183                 cnt = MAX_TRACER_SIZE;
6184
6185         if (copy_from_user(buf, ubuf, cnt))
6186                 return -EFAULT;
6187
6188         buf[cnt] = 0;
6189
6190         /* strip ending whitespace. */
6191         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6192                 buf[i] = 0;
6193
6194         err = tracing_set_tracer(tr, buf);
6195         if (err)
6196                 return err;
6197
6198         *ppos += ret;
6199
6200         return ret;
6201 }
6202
6203 static ssize_t
6204 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6205                    size_t cnt, loff_t *ppos)
6206 {
6207         char buf[64];
6208         int r;
6209
6210         r = snprintf(buf, sizeof(buf), "%ld\n",
6211                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6212         if (r > sizeof(buf))
6213                 r = sizeof(buf);
6214         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6215 }
6216
6217 static ssize_t
6218 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6219                     size_t cnt, loff_t *ppos)
6220 {
6221         unsigned long val;
6222         int ret;
6223
6224         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6225         if (ret)
6226                 return ret;
6227
6228         *ptr = val * 1000;
6229
6230         return cnt;
6231 }
6232
6233 static ssize_t
6234 tracing_thresh_read(struct file *filp, char __user *ubuf,
6235                     size_t cnt, loff_t *ppos)
6236 {
6237         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6238 }
6239
6240 static ssize_t
6241 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6242                      size_t cnt, loff_t *ppos)
6243 {
6244         struct trace_array *tr = filp->private_data;
6245         int ret;
6246
6247         mutex_lock(&trace_types_lock);
6248         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6249         if (ret < 0)
6250                 goto out;
6251
6252         if (tr->current_trace->update_thresh) {
6253                 ret = tr->current_trace->update_thresh(tr);
6254                 if (ret < 0)
6255                         goto out;
6256         }
6257
6258         ret = cnt;
6259 out:
6260         mutex_unlock(&trace_types_lock);
6261
6262         return ret;
6263 }
6264
6265 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6266
6267 static ssize_t
6268 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6269                      size_t cnt, loff_t *ppos)
6270 {
6271         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6272 }
6273
6274 static ssize_t
6275 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6276                       size_t cnt, loff_t *ppos)
6277 {
6278         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6279 }
6280
6281 #endif
6282
6283 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6284 {
6285         if (cpu == RING_BUFFER_ALL_CPUS) {
6286                 if (cpumask_empty(tr->pipe_cpumask)) {
6287                         cpumask_setall(tr->pipe_cpumask);
6288                         return 0;
6289                 }
6290         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6291                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6292                 return 0;
6293         }
6294         return -EBUSY;
6295 }
6296
6297 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6298 {
6299         if (cpu == RING_BUFFER_ALL_CPUS) {
6300                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6301                 cpumask_clear(tr->pipe_cpumask);
6302         } else {
6303                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6304                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6305         }
6306 }
6307
6308 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6309 {
6310         struct trace_array *tr = inode->i_private;
6311         struct trace_iterator *iter;
6312         int cpu;
6313         int ret;
6314
6315         ret = tracing_check_open_get_tr(tr);
6316         if (ret)
6317                 return ret;
6318
6319         mutex_lock(&trace_types_lock);
6320         cpu = tracing_get_cpu(inode);
6321         ret = open_pipe_on_cpu(tr, cpu);
6322         if (ret)
6323                 goto fail_pipe_on_cpu;
6324
6325         /* create a buffer to store the information to pass to userspace */
6326         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6327         if (!iter) {
6328                 ret = -ENOMEM;
6329                 goto fail_alloc_iter;
6330         }
6331
6332         trace_seq_init(&iter->seq);
6333         iter->trace = tr->current_trace;
6334
6335         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6336                 ret = -ENOMEM;
6337                 goto fail;
6338         }
6339
6340         /* trace pipe does not show start of buffer */
6341         cpumask_setall(iter->started);
6342
6343         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6344                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6345
6346         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6347         if (trace_clocks[tr->clock_id].in_ns)
6348                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6349
6350         iter->tr = tr;
6351         iter->array_buffer = &tr->array_buffer;
6352         iter->cpu_file = cpu;
6353         mutex_init(&iter->mutex);
6354         filp->private_data = iter;
6355
6356         if (iter->trace->pipe_open)
6357                 iter->trace->pipe_open(iter);
6358
6359         nonseekable_open(inode, filp);
6360
6361         tr->trace_ref++;
6362
6363         mutex_unlock(&trace_types_lock);
6364         return ret;
6365
6366 fail:
6367         kfree(iter);
6368 fail_alloc_iter:
6369         close_pipe_on_cpu(tr, cpu);
6370 fail_pipe_on_cpu:
6371         __trace_array_put(tr);
6372         mutex_unlock(&trace_types_lock);
6373         return ret;
6374 }
6375
6376 static int tracing_release_pipe(struct inode *inode, struct file *file)
6377 {
6378         struct trace_iterator *iter = file->private_data;
6379         struct trace_array *tr = inode->i_private;
6380
6381         mutex_lock(&trace_types_lock);
6382
6383         tr->trace_ref--;
6384
6385         if (iter->trace->pipe_close)
6386                 iter->trace->pipe_close(iter);
6387         close_pipe_on_cpu(tr, iter->cpu_file);
6388         mutex_unlock(&trace_types_lock);
6389
6390         free_cpumask_var(iter->started);
6391         kfree(iter->temp);
6392         mutex_destroy(&iter->mutex);
6393         kfree(iter);
6394
6395         trace_array_put(tr);
6396
6397         return 0;
6398 }
6399
6400 static __poll_t
6401 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6402 {
6403         struct trace_array *tr = iter->tr;
6404
6405         /* Iterators are static, they should be filled or empty */
6406         if (trace_buffer_iter(iter, iter->cpu_file))
6407                 return EPOLLIN | EPOLLRDNORM;
6408
6409         if (tr->trace_flags & TRACE_ITER_BLOCK)
6410                 /*
6411                  * Always select as readable when in blocking mode
6412                  */
6413                 return EPOLLIN | EPOLLRDNORM;
6414         else
6415                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6416                                              filp, poll_table, iter->tr->buffer_percent);
6417 }
6418
6419 static __poll_t
6420 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6421 {
6422         struct trace_iterator *iter = filp->private_data;
6423
6424         return trace_poll(iter, filp, poll_table);
6425 }
6426
6427 /* Must be called with iter->mutex held. */
6428 static int tracing_wait_pipe(struct file *filp)
6429 {
6430         struct trace_iterator *iter = filp->private_data;
6431         int ret;
6432
6433         while (trace_empty(iter)) {
6434
6435                 if ((filp->f_flags & O_NONBLOCK)) {
6436                         return -EAGAIN;
6437                 }
6438
6439                 /*
6440                  * We block until we read something and tracing is disabled.
6441                  * We still block if tracing is disabled, but we have never
6442                  * read anything. This allows a user to cat this file, and
6443                  * then enable tracing. But after we have read something,
6444                  * we give an EOF when tracing is again disabled.
6445                  *
6446                  * iter->pos will be 0 if we haven't read anything.
6447                  */
6448                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6449                         break;
6450
6451                 mutex_unlock(&iter->mutex);
6452
6453                 ret = wait_on_pipe(iter, 0);
6454
6455                 mutex_lock(&iter->mutex);
6456
6457                 if (ret)
6458                         return ret;
6459         }
6460
6461         return 1;
6462 }
6463
6464 /*
6465  * Consumer reader.
6466  */
6467 static ssize_t
6468 tracing_read_pipe(struct file *filp, char __user *ubuf,
6469                   size_t cnt, loff_t *ppos)
6470 {
6471         struct trace_iterator *iter = filp->private_data;
6472         ssize_t sret;
6473
6474         /*
6475          * Avoid more than one consumer on a single file descriptor
6476          * This is just a matter of traces coherency, the ring buffer itself
6477          * is protected.
6478          */
6479         mutex_lock(&iter->mutex);
6480
6481         /* return any leftover data */
6482         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6483         if (sret != -EBUSY)
6484                 goto out;
6485
6486         trace_seq_init(&iter->seq);
6487
6488         if (iter->trace->read) {
6489                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6490                 if (sret)
6491                         goto out;
6492         }
6493
6494 waitagain:
6495         sret = tracing_wait_pipe(filp);
6496         if (sret <= 0)
6497                 goto out;
6498
6499         /* stop when tracing is finished */
6500         if (trace_empty(iter)) {
6501                 sret = 0;
6502                 goto out;
6503         }
6504
6505         if (cnt >= PAGE_SIZE)
6506                 cnt = PAGE_SIZE - 1;
6507
6508         /* reset all but tr, trace, and overruns */
6509         memset(&iter->seq, 0,
6510                sizeof(struct trace_iterator) -
6511                offsetof(struct trace_iterator, seq));
6512         cpumask_clear(iter->started);
6513         trace_seq_init(&iter->seq);
6514         iter->pos = -1;
6515
6516         trace_event_read_lock();
6517         trace_access_lock(iter->cpu_file);
6518         while (trace_find_next_entry_inc(iter) != NULL) {
6519                 enum print_line_t ret;
6520                 int save_len = iter->seq.seq.len;
6521
6522                 ret = print_trace_line(iter);
6523                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6524                         /*
6525                          * If one print_trace_line() fills entire trace_seq in one shot,
6526                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6527                          * In this case, we need to consume it, otherwise, loop will peek
6528                          * this event next time, resulting in an infinite loop.
6529                          */
6530                         if (save_len == 0) {
6531                                 iter->seq.full = 0;
6532                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6533                                 trace_consume(iter);
6534                                 break;
6535                         }
6536
6537                         /* In other cases, don't print partial lines */
6538                         iter->seq.seq.len = save_len;
6539                         break;
6540                 }
6541                 if (ret != TRACE_TYPE_NO_CONSUME)
6542                         trace_consume(iter);
6543
6544                 if (trace_seq_used(&iter->seq) >= cnt)
6545                         break;
6546
6547                 /*
6548                  * Setting the full flag means we reached the trace_seq buffer
6549                  * size and we should leave by partial output condition above.
6550                  * One of the trace_seq_* functions is not used properly.
6551                  */
6552                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6553                           iter->ent->type);
6554         }
6555         trace_access_unlock(iter->cpu_file);
6556         trace_event_read_unlock();
6557
6558         /* Now copy what we have to the user */
6559         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6560         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6561                 trace_seq_init(&iter->seq);
6562
6563         /*
6564          * If there was nothing to send to user, in spite of consuming trace
6565          * entries, go back to wait for more entries.
6566          */
6567         if (sret == -EBUSY)
6568                 goto waitagain;
6569
6570 out:
6571         mutex_unlock(&iter->mutex);
6572
6573         return sret;
6574 }
6575
6576 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6577                                      unsigned int idx)
6578 {
6579         __free_page(spd->pages[idx]);
6580 }
6581
6582 static size_t
6583 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6584 {
6585         size_t count;
6586         int save_len;
6587         int ret;
6588
6589         /* Seq buffer is page-sized, exactly what we need. */
6590         for (;;) {
6591                 save_len = iter->seq.seq.len;
6592                 ret = print_trace_line(iter);
6593
6594                 if (trace_seq_has_overflowed(&iter->seq)) {
6595                         iter->seq.seq.len = save_len;
6596                         break;
6597                 }
6598
6599                 /*
6600                  * This should not be hit, because it should only
6601                  * be set if the iter->seq overflowed. But check it
6602                  * anyway to be safe.
6603                  */
6604                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6605                         iter->seq.seq.len = save_len;
6606                         break;
6607                 }
6608
6609                 count = trace_seq_used(&iter->seq) - save_len;
6610                 if (rem < count) {
6611                         rem = 0;
6612                         iter->seq.seq.len = save_len;
6613                         break;
6614                 }
6615
6616                 if (ret != TRACE_TYPE_NO_CONSUME)
6617                         trace_consume(iter);
6618                 rem -= count;
6619                 if (!trace_find_next_entry_inc(iter))   {
6620                         rem = 0;
6621                         iter->ent = NULL;
6622                         break;
6623                 }
6624         }
6625
6626         return rem;
6627 }
6628
6629 static ssize_t tracing_splice_read_pipe(struct file *filp,
6630                                         loff_t *ppos,
6631                                         struct pipe_inode_info *pipe,
6632                                         size_t len,
6633                                         unsigned int flags)
6634 {
6635         struct page *pages_def[PIPE_DEF_BUFFERS];
6636         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6637         struct trace_iterator *iter = filp->private_data;
6638         struct splice_pipe_desc spd = {
6639                 .pages          = pages_def,
6640                 .partial        = partial_def,
6641                 .nr_pages       = 0, /* This gets updated below. */
6642                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6643                 .ops            = &default_pipe_buf_ops,
6644                 .spd_release    = tracing_spd_release_pipe,
6645         };
6646         ssize_t ret;
6647         size_t rem;
6648         unsigned int i;
6649
6650         if (splice_grow_spd(pipe, &spd))
6651                 return -ENOMEM;
6652
6653         mutex_lock(&iter->mutex);
6654
6655         if (iter->trace->splice_read) {
6656                 ret = iter->trace->splice_read(iter, filp,
6657                                                ppos, pipe, len, flags);
6658                 if (ret)
6659                         goto out_err;
6660         }
6661
6662         ret = tracing_wait_pipe(filp);
6663         if (ret <= 0)
6664                 goto out_err;
6665
6666         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6667                 ret = -EFAULT;
6668                 goto out_err;
6669         }
6670
6671         trace_event_read_lock();
6672         trace_access_lock(iter->cpu_file);
6673
6674         /* Fill as many pages as possible. */
6675         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6676                 spd.pages[i] = alloc_page(GFP_KERNEL);
6677                 if (!spd.pages[i])
6678                         break;
6679
6680                 rem = tracing_fill_pipe_page(rem, iter);
6681
6682                 /* Copy the data into the page, so we can start over. */
6683                 ret = trace_seq_to_buffer(&iter->seq,
6684                                           page_address(spd.pages[i]),
6685                                           trace_seq_used(&iter->seq));
6686                 if (ret < 0) {
6687                         __free_page(spd.pages[i]);
6688                         break;
6689                 }
6690                 spd.partial[i].offset = 0;
6691                 spd.partial[i].len = trace_seq_used(&iter->seq);
6692
6693                 trace_seq_init(&iter->seq);
6694         }
6695
6696         trace_access_unlock(iter->cpu_file);
6697         trace_event_read_unlock();
6698         mutex_unlock(&iter->mutex);
6699
6700         spd.nr_pages = i;
6701
6702         if (i)
6703                 ret = splice_to_pipe(pipe, &spd);
6704         else
6705                 ret = 0;
6706 out:
6707         splice_shrink_spd(&spd);
6708         return ret;
6709
6710 out_err:
6711         mutex_unlock(&iter->mutex);
6712         goto out;
6713 }
6714
6715 static ssize_t
6716 tracing_entries_read(struct file *filp, char __user *ubuf,
6717                      size_t cnt, loff_t *ppos)
6718 {
6719         struct inode *inode = file_inode(filp);
6720         struct trace_array *tr = inode->i_private;
6721         int cpu = tracing_get_cpu(inode);
6722         char buf[64];
6723         int r = 0;
6724         ssize_t ret;
6725
6726         mutex_lock(&trace_types_lock);
6727
6728         if (cpu == RING_BUFFER_ALL_CPUS) {
6729                 int cpu, buf_size_same;
6730                 unsigned long size;
6731
6732                 size = 0;
6733                 buf_size_same = 1;
6734                 /* check if all cpu sizes are same */
6735                 for_each_tracing_cpu(cpu) {
6736                         /* fill in the size from first enabled cpu */
6737                         if (size == 0)
6738                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6739                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6740                                 buf_size_same = 0;
6741                                 break;
6742                         }
6743                 }
6744
6745                 if (buf_size_same) {
6746                         if (!ring_buffer_expanded)
6747                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6748                                             size >> 10,
6749                                             trace_buf_size >> 10);
6750                         else
6751                                 r = sprintf(buf, "%lu\n", size >> 10);
6752                 } else
6753                         r = sprintf(buf, "X\n");
6754         } else
6755                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6756
6757         mutex_unlock(&trace_types_lock);
6758
6759         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6760         return ret;
6761 }
6762
6763 static ssize_t
6764 tracing_entries_write(struct file *filp, const char __user *ubuf,
6765                       size_t cnt, loff_t *ppos)
6766 {
6767         struct inode *inode = file_inode(filp);
6768         struct trace_array *tr = inode->i_private;
6769         unsigned long val;
6770         int ret;
6771
6772         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6773         if (ret)
6774                 return ret;
6775
6776         /* must have at least 1 entry */
6777         if (!val)
6778                 return -EINVAL;
6779
6780         /* value is in KB */
6781         val <<= 10;
6782         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6783         if (ret < 0)
6784                 return ret;
6785
6786         *ppos += cnt;
6787
6788         return cnt;
6789 }
6790
6791 static ssize_t
6792 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6793                                 size_t cnt, loff_t *ppos)
6794 {
6795         struct trace_array *tr = filp->private_data;
6796         char buf[64];
6797         int r, cpu;
6798         unsigned long size = 0, expanded_size = 0;
6799
6800         mutex_lock(&trace_types_lock);
6801         for_each_tracing_cpu(cpu) {
6802                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6803                 if (!ring_buffer_expanded)
6804                         expanded_size += trace_buf_size >> 10;
6805         }
6806         if (ring_buffer_expanded)
6807                 r = sprintf(buf, "%lu\n", size);
6808         else
6809                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6810         mutex_unlock(&trace_types_lock);
6811
6812         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6813 }
6814
6815 static ssize_t
6816 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6817                           size_t cnt, loff_t *ppos)
6818 {
6819         /*
6820          * There is no need to read what the user has written, this function
6821          * is just to make sure that there is no error when "echo" is used
6822          */
6823
6824         *ppos += cnt;
6825
6826         return cnt;
6827 }
6828
6829 static int
6830 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6831 {
6832         struct trace_array *tr = inode->i_private;
6833
6834         /* disable tracing ? */
6835         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6836                 tracer_tracing_off(tr);
6837         /* resize the ring buffer to 0 */
6838         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6839
6840         trace_array_put(tr);
6841
6842         return 0;
6843 }
6844
6845 static ssize_t
6846 tracing_mark_write(struct file *filp, const char __user *ubuf,
6847                                         size_t cnt, loff_t *fpos)
6848 {
6849         struct trace_array *tr = filp->private_data;
6850         struct ring_buffer_event *event;
6851         enum event_trigger_type tt = ETT_NONE;
6852         struct trace_buffer *buffer;
6853         struct print_entry *entry;
6854         unsigned long irq_flags;
6855         ssize_t written;
6856         int size;
6857         int len;
6858
6859 /* Used in tracing_mark_raw_write() as well */
6860 #define FAULTED_STR "<faulted>"
6861 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6862
6863         if (tracing_disabled)
6864                 return -EINVAL;
6865
6866         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6867                 return -EINVAL;
6868
6869         if (cnt > TRACE_BUF_SIZE)
6870                 cnt = TRACE_BUF_SIZE;
6871
6872         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6873
6874         local_save_flags(irq_flags);
6875         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6876
6877         /* If less than "<faulted>", then make sure we can still add that */
6878         if (cnt < FAULTED_SIZE)
6879                 size += FAULTED_SIZE - cnt;
6880
6881         buffer = tr->array_buffer.buffer;
6882         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6883                                             irq_flags, preempt_count());
6884         if (unlikely(!event))
6885                 /* Ring buffer disabled, return as if not open for write */
6886                 return -EBADF;
6887
6888         entry = ring_buffer_event_data(event);
6889         entry->ip = _THIS_IP_;
6890
6891         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6892         if (len) {
6893                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6894                 cnt = FAULTED_SIZE;
6895                 written = -EFAULT;
6896         } else
6897                 written = cnt;
6898
6899         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6900                 /* do not add \n before testing triggers, but add \0 */
6901                 entry->buf[cnt] = '\0';
6902                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6903         }
6904
6905         if (entry->buf[cnt - 1] != '\n') {
6906                 entry->buf[cnt] = '\n';
6907                 entry->buf[cnt + 1] = '\0';
6908         } else
6909                 entry->buf[cnt] = '\0';
6910
6911         if (static_branch_unlikely(&trace_marker_exports_enabled))
6912                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6913         __buffer_unlock_commit(buffer, event);
6914
6915         if (tt)
6916                 event_triggers_post_call(tr->trace_marker_file, tt);
6917
6918         if (written > 0)
6919                 *fpos += written;
6920
6921         return written;
6922 }
6923
6924 /* Limit it for now to 3K (including tag) */
6925 #define RAW_DATA_MAX_SIZE (1024*3)
6926
6927 static ssize_t
6928 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6929                                         size_t cnt, loff_t *fpos)
6930 {
6931         struct trace_array *tr = filp->private_data;
6932         struct ring_buffer_event *event;
6933         struct trace_buffer *buffer;
6934         struct raw_data_entry *entry;
6935         unsigned long irq_flags;
6936         ssize_t written;
6937         int size;
6938         int len;
6939
6940 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6941
6942         if (tracing_disabled)
6943                 return -EINVAL;
6944
6945         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6946                 return -EINVAL;
6947
6948         /* The marker must at least have a tag id */
6949         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6950                 return -EINVAL;
6951
6952         if (cnt > TRACE_BUF_SIZE)
6953                 cnt = TRACE_BUF_SIZE;
6954
6955         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6956
6957         local_save_flags(irq_flags);
6958         size = sizeof(*entry) + cnt;
6959         if (cnt < FAULT_SIZE_ID)
6960                 size += FAULT_SIZE_ID - cnt;
6961
6962         buffer = tr->array_buffer.buffer;
6963         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6964                                             irq_flags, preempt_count());
6965         if (!event)
6966                 /* Ring buffer disabled, return as if not open for write */
6967                 return -EBADF;
6968
6969         entry = ring_buffer_event_data(event);
6970
6971         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6972         if (len) {
6973                 entry->id = -1;
6974                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6975                 written = -EFAULT;
6976         } else
6977                 written = cnt;
6978
6979         __buffer_unlock_commit(buffer, event);
6980
6981         if (written > 0)
6982                 *fpos += written;
6983
6984         return written;
6985 }
6986
6987 static int tracing_clock_show(struct seq_file *m, void *v)
6988 {
6989         struct trace_array *tr = m->private;
6990         int i;
6991
6992         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6993                 seq_printf(m,
6994                         "%s%s%s%s", i ? " " : "",
6995                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6996                         i == tr->clock_id ? "]" : "");
6997         seq_putc(m, '\n');
6998
6999         return 0;
7000 }
7001
7002 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7003 {
7004         int i;
7005
7006         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7007                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7008                         break;
7009         }
7010         if (i == ARRAY_SIZE(trace_clocks))
7011                 return -EINVAL;
7012
7013         mutex_lock(&trace_types_lock);
7014
7015         tr->clock_id = i;
7016
7017         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7018
7019         /*
7020          * New clock may not be consistent with the previous clock.
7021          * Reset the buffer so that it doesn't have incomparable timestamps.
7022          */
7023         tracing_reset_online_cpus(&tr->array_buffer);
7024
7025 #ifdef CONFIG_TRACER_MAX_TRACE
7026         if (tr->max_buffer.buffer)
7027                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7028         tracing_reset_online_cpus(&tr->max_buffer);
7029 #endif
7030
7031         mutex_unlock(&trace_types_lock);
7032
7033         return 0;
7034 }
7035
7036 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7037                                    size_t cnt, loff_t *fpos)
7038 {
7039         struct seq_file *m = filp->private_data;
7040         struct trace_array *tr = m->private;
7041         char buf[64];
7042         const char *clockstr;
7043         int ret;
7044
7045         if (cnt >= sizeof(buf))
7046                 return -EINVAL;
7047
7048         if (copy_from_user(buf, ubuf, cnt))
7049                 return -EFAULT;
7050
7051         buf[cnt] = 0;
7052
7053         clockstr = strstrip(buf);
7054
7055         ret = tracing_set_clock(tr, clockstr);
7056         if (ret)
7057                 return ret;
7058
7059         *fpos += cnt;
7060
7061         return cnt;
7062 }
7063
7064 static int tracing_clock_open(struct inode *inode, struct file *file)
7065 {
7066         struct trace_array *tr = inode->i_private;
7067         int ret;
7068
7069         ret = tracing_check_open_get_tr(tr);
7070         if (ret)
7071                 return ret;
7072
7073         ret = single_open(file, tracing_clock_show, inode->i_private);
7074         if (ret < 0)
7075                 trace_array_put(tr);
7076
7077         return ret;
7078 }
7079
7080 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7081 {
7082         struct trace_array *tr = m->private;
7083
7084         mutex_lock(&trace_types_lock);
7085
7086         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7087                 seq_puts(m, "delta [absolute]\n");
7088         else
7089                 seq_puts(m, "[delta] absolute\n");
7090
7091         mutex_unlock(&trace_types_lock);
7092
7093         return 0;
7094 }
7095
7096 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7097 {
7098         struct trace_array *tr = inode->i_private;
7099         int ret;
7100
7101         ret = tracing_check_open_get_tr(tr);
7102         if (ret)
7103                 return ret;
7104
7105         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7106         if (ret < 0)
7107                 trace_array_put(tr);
7108
7109         return ret;
7110 }
7111
7112 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
7113 {
7114         int ret = 0;
7115
7116         mutex_lock(&trace_types_lock);
7117
7118         if (abs && tr->time_stamp_abs_ref++)
7119                 goto out;
7120
7121         if (!abs) {
7122                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
7123                         ret = -EINVAL;
7124                         goto out;
7125                 }
7126
7127                 if (--tr->time_stamp_abs_ref)
7128                         goto out;
7129         }
7130
7131         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
7132
7133 #ifdef CONFIG_TRACER_MAX_TRACE
7134         if (tr->max_buffer.buffer)
7135                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
7136 #endif
7137  out:
7138         mutex_unlock(&trace_types_lock);
7139
7140         return ret;
7141 }
7142
7143 struct ftrace_buffer_info {
7144         struct trace_iterator   iter;
7145         void                    *spare;
7146         unsigned int            spare_cpu;
7147         unsigned int            read;
7148 };
7149
7150 #ifdef CONFIG_TRACER_SNAPSHOT
7151 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7152 {
7153         struct trace_array *tr = inode->i_private;
7154         struct trace_iterator *iter;
7155         struct seq_file *m;
7156         int ret;
7157
7158         ret = tracing_check_open_get_tr(tr);
7159         if (ret)
7160                 return ret;
7161
7162         if (file->f_mode & FMODE_READ) {
7163                 iter = __tracing_open(inode, file, true);
7164                 if (IS_ERR(iter))
7165                         ret = PTR_ERR(iter);
7166         } else {
7167                 /* Writes still need the seq_file to hold the private data */
7168                 ret = -ENOMEM;
7169                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7170                 if (!m)
7171                         goto out;
7172                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7173                 if (!iter) {
7174                         kfree(m);
7175                         goto out;
7176                 }
7177                 ret = 0;
7178
7179                 iter->tr = tr;
7180                 iter->array_buffer = &tr->max_buffer;
7181                 iter->cpu_file = tracing_get_cpu(inode);
7182                 m->private = iter;
7183                 file->private_data = m;
7184         }
7185 out:
7186         if (ret < 0)
7187                 trace_array_put(tr);
7188
7189         return ret;
7190 }
7191
7192 static void tracing_swap_cpu_buffer(void *tr)
7193 {
7194         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7195 }
7196
7197 static ssize_t
7198 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7199                        loff_t *ppos)
7200 {
7201         struct seq_file *m = filp->private_data;
7202         struct trace_iterator *iter = m->private;
7203         struct trace_array *tr = iter->tr;
7204         unsigned long val;
7205         int ret;
7206
7207         ret = tracing_update_buffers();
7208         if (ret < 0)
7209                 return ret;
7210
7211         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7212         if (ret)
7213                 return ret;
7214
7215         mutex_lock(&trace_types_lock);
7216
7217         if (tr->current_trace->use_max_tr) {
7218                 ret = -EBUSY;
7219                 goto out;
7220         }
7221
7222         local_irq_disable();
7223         arch_spin_lock(&tr->max_lock);
7224         if (tr->cond_snapshot)
7225                 ret = -EBUSY;
7226         arch_spin_unlock(&tr->max_lock);
7227         local_irq_enable();
7228         if (ret)
7229                 goto out;
7230
7231         switch (val) {
7232         case 0:
7233                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7234                         ret = -EINVAL;
7235                         break;
7236                 }
7237                 if (tr->allocated_snapshot)
7238                         free_snapshot(tr);
7239                 break;
7240         case 1:
7241 /* Only allow per-cpu swap if the ring buffer supports it */
7242 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7243                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7244                         ret = -EINVAL;
7245                         break;
7246                 }
7247 #endif
7248                 if (tr->allocated_snapshot)
7249                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7250                                         &tr->array_buffer, iter->cpu_file);
7251                 else
7252                         ret = tracing_alloc_snapshot_instance(tr);
7253                 if (ret < 0)
7254                         break;
7255                 /* Now, we're going to swap */
7256                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7257                         local_irq_disable();
7258                         update_max_tr(tr, current, smp_processor_id(), NULL);
7259                         local_irq_enable();
7260                 } else {
7261                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7262                                                  (void *)tr, 1);
7263                 }
7264                 break;
7265         default:
7266                 if (tr->allocated_snapshot) {
7267                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7268                                 tracing_reset_online_cpus(&tr->max_buffer);
7269                         else
7270                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7271                 }
7272                 break;
7273         }
7274
7275         if (ret >= 0) {
7276                 *ppos += cnt;
7277                 ret = cnt;
7278         }
7279 out:
7280         mutex_unlock(&trace_types_lock);
7281         return ret;
7282 }
7283
7284 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7285 {
7286         struct seq_file *m = file->private_data;
7287         int ret;
7288
7289         ret = tracing_release(inode, file);
7290
7291         if (file->f_mode & FMODE_READ)
7292                 return ret;
7293
7294         /* If write only, the seq_file is just a stub */
7295         if (m)
7296                 kfree(m->private);
7297         kfree(m);
7298
7299         return 0;
7300 }
7301
7302 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7303 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7304                                     size_t count, loff_t *ppos);
7305 static int tracing_buffers_release(struct inode *inode, struct file *file);
7306 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7307                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7308
7309 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7310 {
7311         struct ftrace_buffer_info *info;
7312         int ret;
7313
7314         /* The following checks for tracefs lockdown */
7315         ret = tracing_buffers_open(inode, filp);
7316         if (ret < 0)
7317                 return ret;
7318
7319         info = filp->private_data;
7320
7321         if (info->iter.trace->use_max_tr) {
7322                 tracing_buffers_release(inode, filp);
7323                 return -EBUSY;
7324         }
7325
7326         info->iter.snapshot = true;
7327         info->iter.array_buffer = &info->iter.tr->max_buffer;
7328
7329         return ret;
7330 }
7331
7332 #endif /* CONFIG_TRACER_SNAPSHOT */
7333
7334
7335 static const struct file_operations tracing_thresh_fops = {
7336         .open           = tracing_open_generic,
7337         .read           = tracing_thresh_read,
7338         .write          = tracing_thresh_write,
7339         .llseek         = generic_file_llseek,
7340 };
7341
7342 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7343 static const struct file_operations tracing_max_lat_fops = {
7344         .open           = tracing_open_generic,
7345         .read           = tracing_max_lat_read,
7346         .write          = tracing_max_lat_write,
7347         .llseek         = generic_file_llseek,
7348 };
7349 #endif
7350
7351 static const struct file_operations set_tracer_fops = {
7352         .open           = tracing_open_generic_tr,
7353         .read           = tracing_set_trace_read,
7354         .write          = tracing_set_trace_write,
7355         .llseek         = generic_file_llseek,
7356         .release        = tracing_release_generic_tr,
7357 };
7358
7359 static const struct file_operations tracing_pipe_fops = {
7360         .open           = tracing_open_pipe,
7361         .poll           = tracing_poll_pipe,
7362         .read           = tracing_read_pipe,
7363         .splice_read    = tracing_splice_read_pipe,
7364         .release        = tracing_release_pipe,
7365         .llseek         = no_llseek,
7366 };
7367
7368 static const struct file_operations tracing_entries_fops = {
7369         .open           = tracing_open_generic_tr,
7370         .read           = tracing_entries_read,
7371         .write          = tracing_entries_write,
7372         .llseek         = generic_file_llseek,
7373         .release        = tracing_release_generic_tr,
7374 };
7375
7376 static const struct file_operations tracing_total_entries_fops = {
7377         .open           = tracing_open_generic_tr,
7378         .read           = tracing_total_entries_read,
7379         .llseek         = generic_file_llseek,
7380         .release        = tracing_release_generic_tr,
7381 };
7382
7383 static const struct file_operations tracing_free_buffer_fops = {
7384         .open           = tracing_open_generic_tr,
7385         .write          = tracing_free_buffer_write,
7386         .release        = tracing_free_buffer_release,
7387 };
7388
7389 static const struct file_operations tracing_mark_fops = {
7390         .open           = tracing_open_generic_tr,
7391         .write          = tracing_mark_write,
7392         .llseek         = generic_file_llseek,
7393         .release        = tracing_release_generic_tr,
7394 };
7395
7396 static const struct file_operations tracing_mark_raw_fops = {
7397         .open           = tracing_open_generic_tr,
7398         .write          = tracing_mark_raw_write,
7399         .llseek         = generic_file_llseek,
7400         .release        = tracing_release_generic_tr,
7401 };
7402
7403 static const struct file_operations trace_clock_fops = {
7404         .open           = tracing_clock_open,
7405         .read           = seq_read,
7406         .llseek         = seq_lseek,
7407         .release        = tracing_single_release_tr,
7408         .write          = tracing_clock_write,
7409 };
7410
7411 static const struct file_operations trace_time_stamp_mode_fops = {
7412         .open           = tracing_time_stamp_mode_open,
7413         .read           = seq_read,
7414         .llseek         = seq_lseek,
7415         .release        = tracing_single_release_tr,
7416 };
7417
7418 #ifdef CONFIG_TRACER_SNAPSHOT
7419 static const struct file_operations snapshot_fops = {
7420         .open           = tracing_snapshot_open,
7421         .read           = seq_read,
7422         .write          = tracing_snapshot_write,
7423         .llseek         = tracing_lseek,
7424         .release        = tracing_snapshot_release,
7425 };
7426
7427 static const struct file_operations snapshot_raw_fops = {
7428         .open           = snapshot_raw_open,
7429         .read           = tracing_buffers_read,
7430         .release        = tracing_buffers_release,
7431         .splice_read    = tracing_buffers_splice_read,
7432         .llseek         = no_llseek,
7433 };
7434
7435 #endif /* CONFIG_TRACER_SNAPSHOT */
7436
7437 #define TRACING_LOG_ERRS_MAX    8
7438 #define TRACING_LOG_LOC_MAX     128
7439
7440 #define CMD_PREFIX "  Command: "
7441
7442 struct err_info {
7443         const char      **errs; /* ptr to loc-specific array of err strings */
7444         u8              type;   /* index into errs -> specific err string */
7445         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7446         u64             ts;
7447 };
7448
7449 struct tracing_log_err {
7450         struct list_head        list;
7451         struct err_info         info;
7452         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7453         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7454 };
7455
7456 static DEFINE_MUTEX(tracing_err_log_lock);
7457
7458 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7459 {
7460         struct tracing_log_err *err;
7461
7462         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7463                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7464                 if (!err)
7465                         err = ERR_PTR(-ENOMEM);
7466                 else
7467                         tr->n_err_log_entries++;
7468
7469                 return err;
7470         }
7471
7472         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7473         list_del(&err->list);
7474
7475         return err;
7476 }
7477
7478 /**
7479  * err_pos - find the position of a string within a command for error careting
7480  * @cmd: The tracing command that caused the error
7481  * @str: The string to position the caret at within @cmd
7482  *
7483  * Finds the position of the first occurence of @str within @cmd.  The
7484  * return value can be passed to tracing_log_err() for caret placement
7485  * within @cmd.
7486  *
7487  * Returns the index within @cmd of the first occurence of @str or 0
7488  * if @str was not found.
7489  */
7490 unsigned int err_pos(char *cmd, const char *str)
7491 {
7492         char *found;
7493
7494         if (WARN_ON(!strlen(cmd)))
7495                 return 0;
7496
7497         found = strstr(cmd, str);
7498         if (found)
7499                 return found - cmd;
7500
7501         return 0;
7502 }
7503
7504 /**
7505  * tracing_log_err - write an error to the tracing error log
7506  * @tr: The associated trace array for the error (NULL for top level array)
7507  * @loc: A string describing where the error occurred
7508  * @cmd: The tracing command that caused the error
7509  * @errs: The array of loc-specific static error strings
7510  * @type: The index into errs[], which produces the specific static err string
7511  * @pos: The position the caret should be placed in the cmd
7512  *
7513  * Writes an error into tracing/error_log of the form:
7514  *
7515  * <loc>: error: <text>
7516  *   Command: <cmd>
7517  *              ^
7518  *
7519  * tracing/error_log is a small log file containing the last
7520  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7521  * unless there has been a tracing error, and the error log can be
7522  * cleared and have its memory freed by writing the empty string in
7523  * truncation mode to it i.e. echo > tracing/error_log.
7524  *
7525  * NOTE: the @errs array along with the @type param are used to
7526  * produce a static error string - this string is not copied and saved
7527  * when the error is logged - only a pointer to it is saved.  See
7528  * existing callers for examples of how static strings are typically
7529  * defined for use with tracing_log_err().
7530  */
7531 void tracing_log_err(struct trace_array *tr,
7532                      const char *loc, const char *cmd,
7533                      const char **errs, u8 type, u8 pos)
7534 {
7535         struct tracing_log_err *err;
7536
7537         if (!tr)
7538                 tr = &global_trace;
7539
7540         mutex_lock(&tracing_err_log_lock);
7541         err = get_tracing_log_err(tr);
7542         if (PTR_ERR(err) == -ENOMEM) {
7543                 mutex_unlock(&tracing_err_log_lock);
7544                 return;
7545         }
7546
7547         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7548         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7549
7550         err->info.errs = errs;
7551         err->info.type = type;
7552         err->info.pos = pos;
7553         err->info.ts = local_clock();
7554
7555         list_add_tail(&err->list, &tr->err_log);
7556         mutex_unlock(&tracing_err_log_lock);
7557 }
7558
7559 static void clear_tracing_err_log(struct trace_array *tr)
7560 {
7561         struct tracing_log_err *err, *next;
7562
7563         mutex_lock(&tracing_err_log_lock);
7564         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7565                 list_del(&err->list);
7566                 kfree(err);
7567         }
7568
7569         tr->n_err_log_entries = 0;
7570         mutex_unlock(&tracing_err_log_lock);
7571 }
7572
7573 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7574 {
7575         struct trace_array *tr = m->private;
7576
7577         mutex_lock(&tracing_err_log_lock);
7578
7579         return seq_list_start(&tr->err_log, *pos);
7580 }
7581
7582 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7583 {
7584         struct trace_array *tr = m->private;
7585
7586         return seq_list_next(v, &tr->err_log, pos);
7587 }
7588
7589 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7590 {
7591         mutex_unlock(&tracing_err_log_lock);
7592 }
7593
7594 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7595 {
7596         u8 i;
7597
7598         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7599                 seq_putc(m, ' ');
7600         for (i = 0; i < pos; i++)
7601                 seq_putc(m, ' ');
7602         seq_puts(m, "^\n");
7603 }
7604
7605 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7606 {
7607         struct tracing_log_err *err = v;
7608
7609         if (err) {
7610                 const char *err_text = err->info.errs[err->info.type];
7611                 u64 sec = err->info.ts;
7612                 u32 nsec;
7613
7614                 nsec = do_div(sec, NSEC_PER_SEC);
7615                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7616                            err->loc, err_text);
7617                 seq_printf(m, "%s", err->cmd);
7618                 tracing_err_log_show_pos(m, err->info.pos);
7619         }
7620
7621         return 0;
7622 }
7623
7624 static const struct seq_operations tracing_err_log_seq_ops = {
7625         .start  = tracing_err_log_seq_start,
7626         .next   = tracing_err_log_seq_next,
7627         .stop   = tracing_err_log_seq_stop,
7628         .show   = tracing_err_log_seq_show
7629 };
7630
7631 static int tracing_err_log_open(struct inode *inode, struct file *file)
7632 {
7633         struct trace_array *tr = inode->i_private;
7634         int ret = 0;
7635
7636         ret = tracing_check_open_get_tr(tr);
7637         if (ret)
7638                 return ret;
7639
7640         /* If this file was opened for write, then erase contents */
7641         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7642                 clear_tracing_err_log(tr);
7643
7644         if (file->f_mode & FMODE_READ) {
7645                 ret = seq_open(file, &tracing_err_log_seq_ops);
7646                 if (!ret) {
7647                         struct seq_file *m = file->private_data;
7648                         m->private = tr;
7649                 } else {
7650                         trace_array_put(tr);
7651                 }
7652         }
7653         return ret;
7654 }
7655
7656 static ssize_t tracing_err_log_write(struct file *file,
7657                                      const char __user *buffer,
7658                                      size_t count, loff_t *ppos)
7659 {
7660         return count;
7661 }
7662
7663 static int tracing_err_log_release(struct inode *inode, struct file *file)
7664 {
7665         struct trace_array *tr = inode->i_private;
7666
7667         trace_array_put(tr);
7668
7669         if (file->f_mode & FMODE_READ)
7670                 seq_release(inode, file);
7671
7672         return 0;
7673 }
7674
7675 static const struct file_operations tracing_err_log_fops = {
7676         .open           = tracing_err_log_open,
7677         .write          = tracing_err_log_write,
7678         .read           = seq_read,
7679         .llseek         = tracing_lseek,
7680         .release        = tracing_err_log_release,
7681 };
7682
7683 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7684 {
7685         struct trace_array *tr = inode->i_private;
7686         struct ftrace_buffer_info *info;
7687         int ret;
7688
7689         ret = tracing_check_open_get_tr(tr);
7690         if (ret)
7691                 return ret;
7692
7693         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7694         if (!info) {
7695                 trace_array_put(tr);
7696                 return -ENOMEM;
7697         }
7698
7699         mutex_lock(&trace_types_lock);
7700
7701         info->iter.tr           = tr;
7702         info->iter.cpu_file     = tracing_get_cpu(inode);
7703         info->iter.trace        = tr->current_trace;
7704         info->iter.array_buffer = &tr->array_buffer;
7705         info->spare             = NULL;
7706         /* Force reading ring buffer for first read */
7707         info->read              = (unsigned int)-1;
7708
7709         filp->private_data = info;
7710
7711         tr->trace_ref++;
7712
7713         mutex_unlock(&trace_types_lock);
7714
7715         ret = nonseekable_open(inode, filp);
7716         if (ret < 0)
7717                 trace_array_put(tr);
7718
7719         return ret;
7720 }
7721
7722 static __poll_t
7723 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7724 {
7725         struct ftrace_buffer_info *info = filp->private_data;
7726         struct trace_iterator *iter = &info->iter;
7727
7728         return trace_poll(iter, filp, poll_table);
7729 }
7730
7731 static ssize_t
7732 tracing_buffers_read(struct file *filp, char __user *ubuf,
7733                      size_t count, loff_t *ppos)
7734 {
7735         struct ftrace_buffer_info *info = filp->private_data;
7736         struct trace_iterator *iter = &info->iter;
7737         ssize_t ret = 0;
7738         ssize_t size;
7739
7740         if (!count)
7741                 return 0;
7742
7743 #ifdef CONFIG_TRACER_MAX_TRACE
7744         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7745                 return -EBUSY;
7746 #endif
7747
7748         if (!info->spare) {
7749                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7750                                                           iter->cpu_file);
7751                 if (IS_ERR(info->spare)) {
7752                         ret = PTR_ERR(info->spare);
7753                         info->spare = NULL;
7754                 } else {
7755                         info->spare_cpu = iter->cpu_file;
7756                 }
7757         }
7758         if (!info->spare)
7759                 return ret;
7760
7761         /* Do we have previous read data to read? */
7762         if (info->read < PAGE_SIZE)
7763                 goto read;
7764
7765  again:
7766         trace_access_lock(iter->cpu_file);
7767         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7768                                     &info->spare,
7769                                     count,
7770                                     iter->cpu_file, 0);
7771         trace_access_unlock(iter->cpu_file);
7772
7773         if (ret < 0) {
7774                 if (trace_empty(iter)) {
7775                         if ((filp->f_flags & O_NONBLOCK))
7776                                 return -EAGAIN;
7777
7778                         ret = wait_on_pipe(iter, 0);
7779                         if (ret)
7780                                 return ret;
7781
7782                         goto again;
7783                 }
7784                 return 0;
7785         }
7786
7787         info->read = 0;
7788  read:
7789         size = PAGE_SIZE - info->read;
7790         if (size > count)
7791                 size = count;
7792
7793         ret = copy_to_user(ubuf, info->spare + info->read, size);
7794         if (ret == size)
7795                 return -EFAULT;
7796
7797         size -= ret;
7798
7799         *ppos += size;
7800         info->read += size;
7801
7802         return size;
7803 }
7804
7805 static int tracing_buffers_release(struct inode *inode, struct file *file)
7806 {
7807         struct ftrace_buffer_info *info = file->private_data;
7808         struct trace_iterator *iter = &info->iter;
7809
7810         mutex_lock(&trace_types_lock);
7811
7812         iter->tr->trace_ref--;
7813
7814         __trace_array_put(iter->tr);
7815
7816         if (info->spare)
7817                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7818                                            info->spare_cpu, info->spare);
7819         kvfree(info);
7820
7821         mutex_unlock(&trace_types_lock);
7822
7823         return 0;
7824 }
7825
7826 struct buffer_ref {
7827         struct trace_buffer     *buffer;
7828         void                    *page;
7829         int                     cpu;
7830         refcount_t              refcount;
7831 };
7832
7833 static void buffer_ref_release(struct buffer_ref *ref)
7834 {
7835         if (!refcount_dec_and_test(&ref->refcount))
7836                 return;
7837         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7838         kfree(ref);
7839 }
7840
7841 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7842                                     struct pipe_buffer *buf)
7843 {
7844         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7845
7846         buffer_ref_release(ref);
7847         buf->private = 0;
7848 }
7849
7850 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7851                                 struct pipe_buffer *buf)
7852 {
7853         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7854
7855         if (refcount_read(&ref->refcount) > INT_MAX/2)
7856                 return false;
7857
7858         refcount_inc(&ref->refcount);
7859         return true;
7860 }
7861
7862 /* Pipe buffer operations for a buffer. */
7863 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7864         .release                = buffer_pipe_buf_release,
7865         .get                    = buffer_pipe_buf_get,
7866 };
7867
7868 /*
7869  * Callback from splice_to_pipe(), if we need to release some pages
7870  * at the end of the spd in case we error'ed out in filling the pipe.
7871  */
7872 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7873 {
7874         struct buffer_ref *ref =
7875                 (struct buffer_ref *)spd->partial[i].private;
7876
7877         buffer_ref_release(ref);
7878         spd->partial[i].private = 0;
7879 }
7880
7881 static ssize_t
7882 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7883                             struct pipe_inode_info *pipe, size_t len,
7884                             unsigned int flags)
7885 {
7886         struct ftrace_buffer_info *info = file->private_data;
7887         struct trace_iterator *iter = &info->iter;
7888         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7889         struct page *pages_def[PIPE_DEF_BUFFERS];
7890         struct splice_pipe_desc spd = {
7891                 .pages          = pages_def,
7892                 .partial        = partial_def,
7893                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7894                 .ops            = &buffer_pipe_buf_ops,
7895                 .spd_release    = buffer_spd_release,
7896         };
7897         struct buffer_ref *ref;
7898         int entries, i;
7899         ssize_t ret = 0;
7900
7901 #ifdef CONFIG_TRACER_MAX_TRACE
7902         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7903                 return -EBUSY;
7904 #endif
7905
7906         if (*ppos & (PAGE_SIZE - 1))
7907                 return -EINVAL;
7908
7909         if (len & (PAGE_SIZE - 1)) {
7910                 if (len < PAGE_SIZE)
7911                         return -EINVAL;
7912                 len &= PAGE_MASK;
7913         }
7914
7915         if (splice_grow_spd(pipe, &spd))
7916                 return -ENOMEM;
7917
7918  again:
7919         trace_access_lock(iter->cpu_file);
7920         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7921
7922         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7923                 struct page *page;
7924                 int r;
7925
7926                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7927                 if (!ref) {
7928                         ret = -ENOMEM;
7929                         break;
7930                 }
7931
7932                 refcount_set(&ref->refcount, 1);
7933                 ref->buffer = iter->array_buffer->buffer;
7934                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7935                 if (IS_ERR(ref->page)) {
7936                         ret = PTR_ERR(ref->page);
7937                         ref->page = NULL;
7938                         kfree(ref);
7939                         break;
7940                 }
7941                 ref->cpu = iter->cpu_file;
7942
7943                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7944                                           len, iter->cpu_file, 1);
7945                 if (r < 0) {
7946                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7947                                                    ref->page);
7948                         kfree(ref);
7949                         break;
7950                 }
7951
7952                 page = virt_to_page(ref->page);
7953
7954                 spd.pages[i] = page;
7955                 spd.partial[i].len = PAGE_SIZE;
7956                 spd.partial[i].offset = 0;
7957                 spd.partial[i].private = (unsigned long)ref;
7958                 spd.nr_pages++;
7959                 *ppos += PAGE_SIZE;
7960
7961                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7962         }
7963
7964         trace_access_unlock(iter->cpu_file);
7965         spd.nr_pages = i;
7966
7967         /* did we read anything? */
7968         if (!spd.nr_pages) {
7969                 if (ret)
7970                         goto out;
7971
7972                 ret = -EAGAIN;
7973                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7974                         goto out;
7975
7976                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7977                 if (ret)
7978                         goto out;
7979
7980                 goto again;
7981         }
7982
7983         ret = splice_to_pipe(pipe, &spd);
7984 out:
7985         splice_shrink_spd(&spd);
7986
7987         return ret;
7988 }
7989
7990 static const struct file_operations tracing_buffers_fops = {
7991         .open           = tracing_buffers_open,
7992         .read           = tracing_buffers_read,
7993         .poll           = tracing_buffers_poll,
7994         .release        = tracing_buffers_release,
7995         .splice_read    = tracing_buffers_splice_read,
7996         .llseek         = no_llseek,
7997 };
7998
7999 static ssize_t
8000 tracing_stats_read(struct file *filp, char __user *ubuf,
8001                    size_t count, loff_t *ppos)
8002 {
8003         struct inode *inode = file_inode(filp);
8004         struct trace_array *tr = inode->i_private;
8005         struct array_buffer *trace_buf = &tr->array_buffer;
8006         int cpu = tracing_get_cpu(inode);
8007         struct trace_seq *s;
8008         unsigned long cnt;
8009         unsigned long long t;
8010         unsigned long usec_rem;
8011
8012         s = kmalloc(sizeof(*s), GFP_KERNEL);
8013         if (!s)
8014                 return -ENOMEM;
8015
8016         trace_seq_init(s);
8017
8018         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8019         trace_seq_printf(s, "entries: %ld\n", cnt);
8020
8021         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8022         trace_seq_printf(s, "overrun: %ld\n", cnt);
8023
8024         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8025         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8026
8027         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8028         trace_seq_printf(s, "bytes: %ld\n", cnt);
8029
8030         if (trace_clocks[tr->clock_id].in_ns) {
8031                 /* local or global for trace_clock */
8032                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8033                 usec_rem = do_div(t, USEC_PER_SEC);
8034                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8035                                                                 t, usec_rem);
8036
8037                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
8038                 usec_rem = do_div(t, USEC_PER_SEC);
8039                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8040         } else {
8041                 /* counter or tsc mode for trace_clock */
8042                 trace_seq_printf(s, "oldest event ts: %llu\n",
8043                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8044
8045                 trace_seq_printf(s, "now ts: %llu\n",
8046                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
8047         }
8048
8049         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8050         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8051
8052         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8053         trace_seq_printf(s, "read events: %ld\n", cnt);
8054
8055         count = simple_read_from_buffer(ubuf, count, ppos,
8056                                         s->buffer, trace_seq_used(s));
8057
8058         kfree(s);
8059
8060         return count;
8061 }
8062
8063 static const struct file_operations tracing_stats_fops = {
8064         .open           = tracing_open_generic_tr,
8065         .read           = tracing_stats_read,
8066         .llseek         = generic_file_llseek,
8067         .release        = tracing_release_generic_tr,
8068 };
8069
8070 #ifdef CONFIG_DYNAMIC_FTRACE
8071
8072 static ssize_t
8073 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8074                   size_t cnt, loff_t *ppos)
8075 {
8076         ssize_t ret;
8077         char *buf;
8078         int r;
8079
8080         /* 256 should be plenty to hold the amount needed */
8081         buf = kmalloc(256, GFP_KERNEL);
8082         if (!buf)
8083                 return -ENOMEM;
8084
8085         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8086                       ftrace_update_tot_cnt,
8087                       ftrace_number_of_pages,
8088                       ftrace_number_of_groups);
8089
8090         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8091         kfree(buf);
8092         return ret;
8093 }
8094
8095 static const struct file_operations tracing_dyn_info_fops = {
8096         .open           = tracing_open_generic,
8097         .read           = tracing_read_dyn_info,
8098         .llseek         = generic_file_llseek,
8099 };
8100 #endif /* CONFIG_DYNAMIC_FTRACE */
8101
8102 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8103 static void
8104 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8105                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8106                 void *data)
8107 {
8108         tracing_snapshot_instance(tr);
8109 }
8110
8111 static void
8112 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8113                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8114                       void *data)
8115 {
8116         struct ftrace_func_mapper *mapper = data;
8117         long *count = NULL;
8118
8119         if (mapper)
8120                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8121
8122         if (count) {
8123
8124                 if (*count <= 0)
8125                         return;
8126
8127                 (*count)--;
8128         }
8129
8130         tracing_snapshot_instance(tr);
8131 }
8132
8133 static int
8134 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8135                       struct ftrace_probe_ops *ops, void *data)
8136 {
8137         struct ftrace_func_mapper *mapper = data;
8138         long *count = NULL;
8139
8140         seq_printf(m, "%ps:", (void *)ip);
8141
8142         seq_puts(m, "snapshot");
8143
8144         if (mapper)
8145                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8146
8147         if (count)
8148                 seq_printf(m, ":count=%ld\n", *count);
8149         else
8150                 seq_puts(m, ":unlimited\n");
8151
8152         return 0;
8153 }
8154
8155 static int
8156 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8157                      unsigned long ip, void *init_data, void **data)
8158 {
8159         struct ftrace_func_mapper *mapper = *data;
8160
8161         if (!mapper) {
8162                 mapper = allocate_ftrace_func_mapper();
8163                 if (!mapper)
8164                         return -ENOMEM;
8165                 *data = mapper;
8166         }
8167
8168         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8169 }
8170
8171 static void
8172 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8173                      unsigned long ip, void *data)
8174 {
8175         struct ftrace_func_mapper *mapper = data;
8176
8177         if (!ip) {
8178                 if (!mapper)
8179                         return;
8180                 free_ftrace_func_mapper(mapper, NULL);
8181                 return;
8182         }
8183
8184         ftrace_func_mapper_remove_ip(mapper, ip);
8185 }
8186
8187 static struct ftrace_probe_ops snapshot_probe_ops = {
8188         .func                   = ftrace_snapshot,
8189         .print                  = ftrace_snapshot_print,
8190 };
8191
8192 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8193         .func                   = ftrace_count_snapshot,
8194         .print                  = ftrace_snapshot_print,
8195         .init                   = ftrace_snapshot_init,
8196         .free                   = ftrace_snapshot_free,
8197 };
8198
8199 static int
8200 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8201                                char *glob, char *cmd, char *param, int enable)
8202 {
8203         struct ftrace_probe_ops *ops;
8204         void *count = (void *)-1;
8205         char *number;
8206         int ret;
8207
8208         if (!tr)
8209                 return -ENODEV;
8210
8211         /* hash funcs only work with set_ftrace_filter */
8212         if (!enable)
8213                 return -EINVAL;
8214
8215         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8216
8217         if (glob[0] == '!')
8218                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8219
8220         if (!param)
8221                 goto out_reg;
8222
8223         number = strsep(&param, ":");
8224
8225         if (!strlen(number))
8226                 goto out_reg;
8227
8228         /*
8229          * We use the callback data field (which is a pointer)
8230          * as our counter.
8231          */
8232         ret = kstrtoul(number, 0, (unsigned long *)&count);
8233         if (ret)
8234                 return ret;
8235
8236  out_reg:
8237         ret = tracing_alloc_snapshot_instance(tr);
8238         if (ret < 0)
8239                 goto out;
8240
8241         ret = register_ftrace_function_probe(glob, tr, ops, count);
8242
8243  out:
8244         return ret < 0 ? ret : 0;
8245 }
8246
8247 static struct ftrace_func_command ftrace_snapshot_cmd = {
8248         .name                   = "snapshot",
8249         .func                   = ftrace_trace_snapshot_callback,
8250 };
8251
8252 static __init int register_snapshot_cmd(void)
8253 {
8254         return register_ftrace_command(&ftrace_snapshot_cmd);
8255 }
8256 #else
8257 static inline __init int register_snapshot_cmd(void) { return 0; }
8258 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8259
8260 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8261 {
8262         if (WARN_ON(!tr->dir))
8263                 return ERR_PTR(-ENODEV);
8264
8265         /* Top directory uses NULL as the parent */
8266         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8267                 return NULL;
8268
8269         /* All sub buffers have a descriptor */
8270         return tr->dir;
8271 }
8272
8273 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8274 {
8275         struct dentry *d_tracer;
8276
8277         if (tr->percpu_dir)
8278                 return tr->percpu_dir;
8279
8280         d_tracer = tracing_get_dentry(tr);
8281         if (IS_ERR(d_tracer))
8282                 return NULL;
8283
8284         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8285
8286         MEM_FAIL(!tr->percpu_dir,
8287                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8288
8289         return tr->percpu_dir;
8290 }
8291
8292 static struct dentry *
8293 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8294                       void *data, long cpu, const struct file_operations *fops)
8295 {
8296         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8297
8298         if (ret) /* See tracing_get_cpu() */
8299                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8300         return ret;
8301 }
8302
8303 static void
8304 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8305 {
8306         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8307         struct dentry *d_cpu;
8308         char cpu_dir[30]; /* 30 characters should be more than enough */
8309
8310         if (!d_percpu)
8311                 return;
8312
8313         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8314         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8315         if (!d_cpu) {
8316                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8317                 return;
8318         }
8319
8320         /* per cpu trace_pipe */
8321         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8322                                 tr, cpu, &tracing_pipe_fops);
8323
8324         /* per cpu trace */
8325         trace_create_cpu_file("trace", 0644, d_cpu,
8326                                 tr, cpu, &tracing_fops);
8327
8328         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8329                                 tr, cpu, &tracing_buffers_fops);
8330
8331         trace_create_cpu_file("stats", 0444, d_cpu,
8332                                 tr, cpu, &tracing_stats_fops);
8333
8334         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8335                                 tr, cpu, &tracing_entries_fops);
8336
8337 #ifdef CONFIG_TRACER_SNAPSHOT
8338         trace_create_cpu_file("snapshot", 0644, d_cpu,
8339                                 tr, cpu, &snapshot_fops);
8340
8341         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8342                                 tr, cpu, &snapshot_raw_fops);
8343 #endif
8344 }
8345
8346 #ifdef CONFIG_FTRACE_SELFTEST
8347 /* Let selftest have access to static functions in this file */
8348 #include "trace_selftest.c"
8349 #endif
8350
8351 static ssize_t
8352 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8353                         loff_t *ppos)
8354 {
8355         struct trace_option_dentry *topt = filp->private_data;
8356         char *buf;
8357
8358         if (topt->flags->val & topt->opt->bit)
8359                 buf = "1\n";
8360         else
8361                 buf = "0\n";
8362
8363         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8364 }
8365
8366 static ssize_t
8367 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8368                          loff_t *ppos)
8369 {
8370         struct trace_option_dentry *topt = filp->private_data;
8371         unsigned long val;
8372         int ret;
8373
8374         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8375         if (ret)
8376                 return ret;
8377
8378         if (val != 0 && val != 1)
8379                 return -EINVAL;
8380
8381         if (!!(topt->flags->val & topt->opt->bit) != val) {
8382                 mutex_lock(&trace_types_lock);
8383                 ret = __set_tracer_option(topt->tr, topt->flags,
8384                                           topt->opt, !val);
8385                 mutex_unlock(&trace_types_lock);
8386                 if (ret)
8387                         return ret;
8388         }
8389
8390         *ppos += cnt;
8391
8392         return cnt;
8393 }
8394
8395 static int tracing_open_options(struct inode *inode, struct file *filp)
8396 {
8397         struct trace_option_dentry *topt = inode->i_private;
8398         int ret;
8399
8400         ret = tracing_check_open_get_tr(topt->tr);
8401         if (ret)
8402                 return ret;
8403
8404         filp->private_data = inode->i_private;
8405         return 0;
8406 }
8407
8408 static int tracing_release_options(struct inode *inode, struct file *file)
8409 {
8410         struct trace_option_dentry *topt = file->private_data;
8411
8412         trace_array_put(topt->tr);
8413         return 0;
8414 }
8415
8416 static const struct file_operations trace_options_fops = {
8417         .open = tracing_open_options,
8418         .read = trace_options_read,
8419         .write = trace_options_write,
8420         .llseek = generic_file_llseek,
8421         .release = tracing_release_options,
8422 };
8423
8424 /*
8425  * In order to pass in both the trace_array descriptor as well as the index
8426  * to the flag that the trace option file represents, the trace_array
8427  * has a character array of trace_flags_index[], which holds the index
8428  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8429  * The address of this character array is passed to the flag option file
8430  * read/write callbacks.
8431  *
8432  * In order to extract both the index and the trace_array descriptor,
8433  * get_tr_index() uses the following algorithm.
8434  *
8435  *   idx = *ptr;
8436  *
8437  * As the pointer itself contains the address of the index (remember
8438  * index[1] == 1).
8439  *
8440  * Then to get the trace_array descriptor, by subtracting that index
8441  * from the ptr, we get to the start of the index itself.
8442  *
8443  *   ptr - idx == &index[0]
8444  *
8445  * Then a simple container_of() from that pointer gets us to the
8446  * trace_array descriptor.
8447  */
8448 static void get_tr_index(void *data, struct trace_array **ptr,
8449                          unsigned int *pindex)
8450 {
8451         *pindex = *(unsigned char *)data;
8452
8453         *ptr = container_of(data - *pindex, struct trace_array,
8454                             trace_flags_index);
8455 }
8456
8457 static ssize_t
8458 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8459                         loff_t *ppos)
8460 {
8461         void *tr_index = filp->private_data;
8462         struct trace_array *tr;
8463         unsigned int index;
8464         char *buf;
8465
8466         get_tr_index(tr_index, &tr, &index);
8467
8468         if (tr->trace_flags & (1 << index))
8469                 buf = "1\n";
8470         else
8471                 buf = "0\n";
8472
8473         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8474 }
8475
8476 static ssize_t
8477 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8478                          loff_t *ppos)
8479 {
8480         void *tr_index = filp->private_data;
8481         struct trace_array *tr;
8482         unsigned int index;
8483         unsigned long val;
8484         int ret;
8485
8486         get_tr_index(tr_index, &tr, &index);
8487
8488         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8489         if (ret)
8490                 return ret;
8491
8492         if (val != 0 && val != 1)
8493                 return -EINVAL;
8494
8495         mutex_lock(&event_mutex);
8496         mutex_lock(&trace_types_lock);
8497         ret = set_tracer_flag(tr, 1 << index, val);
8498         mutex_unlock(&trace_types_lock);
8499         mutex_unlock(&event_mutex);
8500
8501         if (ret < 0)
8502                 return ret;
8503
8504         *ppos += cnt;
8505
8506         return cnt;
8507 }
8508
8509 static const struct file_operations trace_options_core_fops = {
8510         .open = tracing_open_generic,
8511         .read = trace_options_core_read,
8512         .write = trace_options_core_write,
8513         .llseek = generic_file_llseek,
8514 };
8515
8516 struct dentry *trace_create_file(const char *name,
8517                                  umode_t mode,
8518                                  struct dentry *parent,
8519                                  void *data,
8520                                  const struct file_operations *fops)
8521 {
8522         struct dentry *ret;
8523
8524         ret = tracefs_create_file(name, mode, parent, data, fops);
8525         if (!ret)
8526                 pr_warn("Could not create tracefs '%s' entry\n", name);
8527
8528         return ret;
8529 }
8530
8531
8532 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8533 {
8534         struct dentry *d_tracer;
8535
8536         if (tr->options)
8537                 return tr->options;
8538
8539         d_tracer = tracing_get_dentry(tr);
8540         if (IS_ERR(d_tracer))
8541                 return NULL;
8542
8543         tr->options = tracefs_create_dir("options", d_tracer);
8544         if (!tr->options) {
8545                 pr_warn("Could not create tracefs directory 'options'\n");
8546                 return NULL;
8547         }
8548
8549         return tr->options;
8550 }
8551
8552 static void
8553 create_trace_option_file(struct trace_array *tr,
8554                          struct trace_option_dentry *topt,
8555                          struct tracer_flags *flags,
8556                          struct tracer_opt *opt)
8557 {
8558         struct dentry *t_options;
8559
8560         t_options = trace_options_init_dentry(tr);
8561         if (!t_options)
8562                 return;
8563
8564         topt->flags = flags;
8565         topt->opt = opt;
8566         topt->tr = tr;
8567
8568         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8569                                     &trace_options_fops);
8570
8571 }
8572
8573 static void
8574 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8575 {
8576         struct trace_option_dentry *topts;
8577         struct trace_options *tr_topts;
8578         struct tracer_flags *flags;
8579         struct tracer_opt *opts;
8580         int cnt;
8581         int i;
8582
8583         if (!tracer)
8584                 return;
8585
8586         flags = tracer->flags;
8587
8588         if (!flags || !flags->opts)
8589                 return;
8590
8591         /*
8592          * If this is an instance, only create flags for tracers
8593          * the instance may have.
8594          */
8595         if (!trace_ok_for_array(tracer, tr))
8596                 return;
8597
8598         for (i = 0; i < tr->nr_topts; i++) {
8599                 /* Make sure there's no duplicate flags. */
8600                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8601                         return;
8602         }
8603
8604         opts = flags->opts;
8605
8606         for (cnt = 0; opts[cnt].name; cnt++)
8607                 ;
8608
8609         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8610         if (!topts)
8611                 return;
8612
8613         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8614                             GFP_KERNEL);
8615         if (!tr_topts) {
8616                 kfree(topts);
8617                 return;
8618         }
8619
8620         tr->topts = tr_topts;
8621         tr->topts[tr->nr_topts].tracer = tracer;
8622         tr->topts[tr->nr_topts].topts = topts;
8623         tr->nr_topts++;
8624
8625         for (cnt = 0; opts[cnt].name; cnt++) {
8626                 create_trace_option_file(tr, &topts[cnt], flags,
8627                                          &opts[cnt]);
8628                 MEM_FAIL(topts[cnt].entry == NULL,
8629                           "Failed to create trace option: %s",
8630                           opts[cnt].name);
8631         }
8632 }
8633
8634 static struct dentry *
8635 create_trace_option_core_file(struct trace_array *tr,
8636                               const char *option, long index)
8637 {
8638         struct dentry *t_options;
8639
8640         t_options = trace_options_init_dentry(tr);
8641         if (!t_options)
8642                 return NULL;
8643
8644         return trace_create_file(option, 0644, t_options,
8645                                  (void *)&tr->trace_flags_index[index],
8646                                  &trace_options_core_fops);
8647 }
8648
8649 static void create_trace_options_dir(struct trace_array *tr)
8650 {
8651         struct dentry *t_options;
8652         bool top_level = tr == &global_trace;
8653         int i;
8654
8655         t_options = trace_options_init_dentry(tr);
8656         if (!t_options)
8657                 return;
8658
8659         for (i = 0; trace_options[i]; i++) {
8660                 if (top_level ||
8661                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8662                         create_trace_option_core_file(tr, trace_options[i], i);
8663         }
8664 }
8665
8666 static ssize_t
8667 rb_simple_read(struct file *filp, char __user *ubuf,
8668                size_t cnt, loff_t *ppos)
8669 {
8670         struct trace_array *tr = filp->private_data;
8671         char buf[64];
8672         int r;
8673
8674         r = tracer_tracing_is_on(tr);
8675         r = sprintf(buf, "%d\n", r);
8676
8677         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8678 }
8679
8680 static ssize_t
8681 rb_simple_write(struct file *filp, const char __user *ubuf,
8682                 size_t cnt, loff_t *ppos)
8683 {
8684         struct trace_array *tr = filp->private_data;
8685         struct trace_buffer *buffer = tr->array_buffer.buffer;
8686         unsigned long val;
8687         int ret;
8688
8689         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8690         if (ret)
8691                 return ret;
8692
8693         if (buffer) {
8694                 mutex_lock(&trace_types_lock);
8695                 if (!!val == tracer_tracing_is_on(tr)) {
8696                         val = 0; /* do nothing */
8697                 } else if (val) {
8698                         tracer_tracing_on(tr);
8699                         if (tr->current_trace->start)
8700                                 tr->current_trace->start(tr);
8701                 } else {
8702                         tracer_tracing_off(tr);
8703                         if (tr->current_trace->stop)
8704                                 tr->current_trace->stop(tr);
8705                 }
8706                 mutex_unlock(&trace_types_lock);
8707         }
8708
8709         (*ppos)++;
8710
8711         return cnt;
8712 }
8713
8714 static const struct file_operations rb_simple_fops = {
8715         .open           = tracing_open_generic_tr,
8716         .read           = rb_simple_read,
8717         .write          = rb_simple_write,
8718         .release        = tracing_release_generic_tr,
8719         .llseek         = default_llseek,
8720 };
8721
8722 static ssize_t
8723 buffer_percent_read(struct file *filp, char __user *ubuf,
8724                     size_t cnt, loff_t *ppos)
8725 {
8726         struct trace_array *tr = filp->private_data;
8727         char buf[64];
8728         int r;
8729
8730         r = tr->buffer_percent;
8731         r = sprintf(buf, "%d\n", r);
8732
8733         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8734 }
8735
8736 static ssize_t
8737 buffer_percent_write(struct file *filp, const char __user *ubuf,
8738                      size_t cnt, loff_t *ppos)
8739 {
8740         struct trace_array *tr = filp->private_data;
8741         unsigned long val;
8742         int ret;
8743
8744         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8745         if (ret)
8746                 return ret;
8747
8748         if (val > 100)
8749                 return -EINVAL;
8750
8751         tr->buffer_percent = val;
8752
8753         (*ppos)++;
8754
8755         return cnt;
8756 }
8757
8758 static const struct file_operations buffer_percent_fops = {
8759         .open           = tracing_open_generic_tr,
8760         .read           = buffer_percent_read,
8761         .write          = buffer_percent_write,
8762         .release        = tracing_release_generic_tr,
8763         .llseek         = default_llseek,
8764 };
8765
8766 static struct dentry *trace_instance_dir;
8767
8768 static void
8769 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8770
8771 static int
8772 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8773 {
8774         enum ring_buffer_flags rb_flags;
8775
8776         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8777
8778         buf->tr = tr;
8779
8780         buf->buffer = ring_buffer_alloc(size, rb_flags);
8781         if (!buf->buffer)
8782                 return -ENOMEM;
8783
8784         buf->data = alloc_percpu(struct trace_array_cpu);
8785         if (!buf->data) {
8786                 ring_buffer_free(buf->buffer);
8787                 buf->buffer = NULL;
8788                 return -ENOMEM;
8789         }
8790
8791         /* Allocate the first page for all buffers */
8792         set_buffer_entries(&tr->array_buffer,
8793                            ring_buffer_size(tr->array_buffer.buffer, 0));
8794
8795         return 0;
8796 }
8797
8798 static int allocate_trace_buffers(struct trace_array *tr, int size)
8799 {
8800         int ret;
8801
8802         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8803         if (ret)
8804                 return ret;
8805
8806 #ifdef CONFIG_TRACER_MAX_TRACE
8807         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8808                                     allocate_snapshot ? size : 1);
8809         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8810                 ring_buffer_free(tr->array_buffer.buffer);
8811                 tr->array_buffer.buffer = NULL;
8812                 free_percpu(tr->array_buffer.data);
8813                 tr->array_buffer.data = NULL;
8814                 return -ENOMEM;
8815         }
8816         tr->allocated_snapshot = allocate_snapshot;
8817
8818         /*
8819          * Only the top level trace array gets its snapshot allocated
8820          * from the kernel command line.
8821          */
8822         allocate_snapshot = false;
8823 #endif
8824
8825         return 0;
8826 }
8827
8828 static void free_trace_buffer(struct array_buffer *buf)
8829 {
8830         if (buf->buffer) {
8831                 ring_buffer_free(buf->buffer);
8832                 buf->buffer = NULL;
8833                 free_percpu(buf->data);
8834                 buf->data = NULL;
8835         }
8836 }
8837
8838 static void free_trace_buffers(struct trace_array *tr)
8839 {
8840         if (!tr)
8841                 return;
8842
8843         free_trace_buffer(&tr->array_buffer);
8844
8845 #ifdef CONFIG_TRACER_MAX_TRACE
8846         free_trace_buffer(&tr->max_buffer);
8847 #endif
8848 }
8849
8850 static void init_trace_flags_index(struct trace_array *tr)
8851 {
8852         int i;
8853
8854         /* Used by the trace options files */
8855         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8856                 tr->trace_flags_index[i] = i;
8857 }
8858
8859 static void __update_tracer_options(struct trace_array *tr)
8860 {
8861         struct tracer *t;
8862
8863         for (t = trace_types; t; t = t->next)
8864                 add_tracer_options(tr, t);
8865 }
8866
8867 static void update_tracer_options(struct trace_array *tr)
8868 {
8869         mutex_lock(&trace_types_lock);
8870         tracer_options_updated = true;
8871         __update_tracer_options(tr);
8872         mutex_unlock(&trace_types_lock);
8873 }
8874
8875 /* Must have trace_types_lock held */
8876 struct trace_array *trace_array_find(const char *instance)
8877 {
8878         struct trace_array *tr, *found = NULL;
8879
8880         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8881                 if (tr->name && strcmp(tr->name, instance) == 0) {
8882                         found = tr;
8883                         break;
8884                 }
8885         }
8886
8887         return found;
8888 }
8889
8890 struct trace_array *trace_array_find_get(const char *instance)
8891 {
8892         struct trace_array *tr;
8893
8894         mutex_lock(&trace_types_lock);
8895         tr = trace_array_find(instance);
8896         if (tr)
8897                 tr->ref++;
8898         mutex_unlock(&trace_types_lock);
8899
8900         return tr;
8901 }
8902
8903 static int trace_array_create_dir(struct trace_array *tr)
8904 {
8905         int ret;
8906
8907         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8908         if (!tr->dir)
8909                 return -EINVAL;
8910
8911         ret = event_trace_add_tracer(tr->dir, tr);
8912         if (ret) {
8913                 tracefs_remove(tr->dir);
8914                 return ret;
8915         }
8916
8917         init_tracer_tracefs(tr, tr->dir);
8918         __update_tracer_options(tr);
8919
8920         return ret;
8921 }
8922
8923 static struct trace_array *trace_array_create(const char *name)
8924 {
8925         struct trace_array *tr;
8926         int ret;
8927
8928         ret = -ENOMEM;
8929         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8930         if (!tr)
8931                 return ERR_PTR(ret);
8932
8933         tr->name = kstrdup(name, GFP_KERNEL);
8934         if (!tr->name)
8935                 goto out_free_tr;
8936
8937         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8938                 goto out_free_tr;
8939
8940         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
8941                 goto out_free_tr;
8942
8943         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8944
8945         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8946
8947         raw_spin_lock_init(&tr->start_lock);
8948
8949         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8950
8951         tr->current_trace = &nop_trace;
8952
8953         INIT_LIST_HEAD(&tr->systems);
8954         INIT_LIST_HEAD(&tr->events);
8955         INIT_LIST_HEAD(&tr->hist_vars);
8956         INIT_LIST_HEAD(&tr->err_log);
8957
8958         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8959                 goto out_free_tr;
8960
8961         if (ftrace_allocate_ftrace_ops(tr) < 0)
8962                 goto out_free_tr;
8963
8964         ftrace_init_trace_array(tr);
8965
8966         init_trace_flags_index(tr);
8967
8968         if (trace_instance_dir) {
8969                 ret = trace_array_create_dir(tr);
8970                 if (ret)
8971                         goto out_free_tr;
8972         } else
8973                 __trace_early_add_events(tr);
8974
8975         list_add(&tr->list, &ftrace_trace_arrays);
8976
8977         tr->ref++;
8978
8979         return tr;
8980
8981  out_free_tr:
8982         ftrace_free_ftrace_ops(tr);
8983         free_trace_buffers(tr);
8984         free_cpumask_var(tr->pipe_cpumask);
8985         free_cpumask_var(tr->tracing_cpumask);
8986         kfree(tr->name);
8987         kfree(tr);
8988
8989         return ERR_PTR(ret);
8990 }
8991
8992 static int instance_mkdir(const char *name)
8993 {
8994         struct trace_array *tr;
8995         int ret;
8996
8997         mutex_lock(&event_mutex);
8998         mutex_lock(&trace_types_lock);
8999
9000         ret = -EEXIST;
9001         if (trace_array_find(name))
9002                 goto out_unlock;
9003
9004         tr = trace_array_create(name);
9005
9006         ret = PTR_ERR_OR_ZERO(tr);
9007
9008 out_unlock:
9009         mutex_unlock(&trace_types_lock);
9010         mutex_unlock(&event_mutex);
9011         return ret;
9012 }
9013
9014 /**
9015  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9016  * @name: The name of the trace array to be looked up/created.
9017  *
9018  * Returns pointer to trace array with given name.
9019  * NULL, if it cannot be created.
9020  *
9021  * NOTE: This function increments the reference counter associated with the
9022  * trace array returned. This makes sure it cannot be freed while in use.
9023  * Use trace_array_put() once the trace array is no longer needed.
9024  * If the trace_array is to be freed, trace_array_destroy() needs to
9025  * be called after the trace_array_put(), or simply let user space delete
9026  * it from the tracefs instances directory. But until the
9027  * trace_array_put() is called, user space can not delete it.
9028  *
9029  */
9030 struct trace_array *trace_array_get_by_name(const char *name)
9031 {
9032         struct trace_array *tr;
9033
9034         mutex_lock(&event_mutex);
9035         mutex_lock(&trace_types_lock);
9036
9037         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9038                 if (tr->name && strcmp(tr->name, name) == 0)
9039                         goto out_unlock;
9040         }
9041
9042         tr = trace_array_create(name);
9043
9044         if (IS_ERR(tr))
9045                 tr = NULL;
9046 out_unlock:
9047         if (tr)
9048                 tr->ref++;
9049
9050         mutex_unlock(&trace_types_lock);
9051         mutex_unlock(&event_mutex);
9052         return tr;
9053 }
9054 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9055
9056 static int __remove_instance(struct trace_array *tr)
9057 {
9058         int i;
9059
9060         /* Reference counter for a newly created trace array = 1. */
9061         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9062                 return -EBUSY;
9063
9064         list_del(&tr->list);
9065
9066         /* Disable all the flags that were enabled coming in */
9067         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9068                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9069                         set_tracer_flag(tr, 1 << i, 0);
9070         }
9071
9072         tracing_set_nop(tr);
9073         clear_ftrace_function_probes(tr);
9074         event_trace_del_tracer(tr);
9075         ftrace_clear_pids(tr);
9076         ftrace_destroy_function_files(tr);
9077         tracefs_remove(tr->dir);
9078         free_trace_buffers(tr);
9079         clear_tracing_err_log(tr);
9080
9081         for (i = 0; i < tr->nr_topts; i++) {
9082                 kfree(tr->topts[i].topts);
9083         }
9084         kfree(tr->topts);
9085
9086         free_cpumask_var(tr->pipe_cpumask);
9087         free_cpumask_var(tr->tracing_cpumask);
9088         kfree(tr->name);
9089         kfree(tr);
9090
9091         return 0;
9092 }
9093
9094 int trace_array_destroy(struct trace_array *this_tr)
9095 {
9096         struct trace_array *tr;
9097         int ret;
9098
9099         if (!this_tr)
9100                 return -EINVAL;
9101
9102         mutex_lock(&event_mutex);
9103         mutex_lock(&trace_types_lock);
9104
9105         ret = -ENODEV;
9106
9107         /* Making sure trace array exists before destroying it. */
9108         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9109                 if (tr == this_tr) {
9110                         ret = __remove_instance(tr);
9111                         break;
9112                 }
9113         }
9114
9115         mutex_unlock(&trace_types_lock);
9116         mutex_unlock(&event_mutex);
9117
9118         return ret;
9119 }
9120 EXPORT_SYMBOL_GPL(trace_array_destroy);
9121
9122 static int instance_rmdir(const char *name)
9123 {
9124         struct trace_array *tr;
9125         int ret;
9126
9127         mutex_lock(&event_mutex);
9128         mutex_lock(&trace_types_lock);
9129
9130         ret = -ENODEV;
9131         tr = trace_array_find(name);
9132         if (tr)
9133                 ret = __remove_instance(tr);
9134
9135         mutex_unlock(&trace_types_lock);
9136         mutex_unlock(&event_mutex);
9137
9138         return ret;
9139 }
9140
9141 static __init void create_trace_instances(struct dentry *d_tracer)
9142 {
9143         struct trace_array *tr;
9144
9145         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9146                                                          instance_mkdir,
9147                                                          instance_rmdir);
9148         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9149                 return;
9150
9151         mutex_lock(&event_mutex);
9152         mutex_lock(&trace_types_lock);
9153
9154         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9155                 if (!tr->name)
9156                         continue;
9157                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9158                              "Failed to create instance directory\n"))
9159                         break;
9160         }
9161
9162         mutex_unlock(&trace_types_lock);
9163         mutex_unlock(&event_mutex);
9164 }
9165
9166 static void
9167 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9168 {
9169         struct trace_event_file *file;
9170         int cpu;
9171
9172         trace_create_file("available_tracers", 0444, d_tracer,
9173                         tr, &show_traces_fops);
9174
9175         trace_create_file("current_tracer", 0644, d_tracer,
9176                         tr, &set_tracer_fops);
9177
9178         trace_create_file("tracing_cpumask", 0644, d_tracer,
9179                           tr, &tracing_cpumask_fops);
9180
9181         trace_create_file("trace_options", 0644, d_tracer,
9182                           tr, &tracing_iter_fops);
9183
9184         trace_create_file("trace", 0644, d_tracer,
9185                           tr, &tracing_fops);
9186
9187         trace_create_file("trace_pipe", 0444, d_tracer,
9188                           tr, &tracing_pipe_fops);
9189
9190         trace_create_file("buffer_size_kb", 0644, d_tracer,
9191                           tr, &tracing_entries_fops);
9192
9193         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9194                           tr, &tracing_total_entries_fops);
9195
9196         trace_create_file("free_buffer", 0200, d_tracer,
9197                           tr, &tracing_free_buffer_fops);
9198
9199         trace_create_file("trace_marker", 0220, d_tracer,
9200                           tr, &tracing_mark_fops);
9201
9202         file = __find_event_file(tr, "ftrace", "print");
9203         if (file && file->dir)
9204                 trace_create_file("trigger", 0644, file->dir, file,
9205                                   &event_trigger_fops);
9206         tr->trace_marker_file = file;
9207
9208         trace_create_file("trace_marker_raw", 0220, d_tracer,
9209                           tr, &tracing_mark_raw_fops);
9210
9211         trace_create_file("trace_clock", 0644, d_tracer, tr,
9212                           &trace_clock_fops);
9213
9214         trace_create_file("tracing_on", 0644, d_tracer,
9215                           tr, &rb_simple_fops);
9216
9217         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9218                           &trace_time_stamp_mode_fops);
9219
9220         tr->buffer_percent = 50;
9221
9222         trace_create_file("buffer_percent", 0444, d_tracer,
9223                         tr, &buffer_percent_fops);
9224
9225         create_trace_options_dir(tr);
9226
9227 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9228         trace_create_maxlat_file(tr, d_tracer);
9229 #endif
9230
9231         if (ftrace_create_function_files(tr, d_tracer))
9232                 MEM_FAIL(1, "Could not allocate function filter files");
9233
9234 #ifdef CONFIG_TRACER_SNAPSHOT
9235         trace_create_file("snapshot", 0644, d_tracer,
9236                           tr, &snapshot_fops);
9237 #endif
9238
9239         trace_create_file("error_log", 0644, d_tracer,
9240                           tr, &tracing_err_log_fops);
9241
9242         for_each_tracing_cpu(cpu)
9243                 tracing_init_tracefs_percpu(tr, cpu);
9244
9245         ftrace_init_tracefs(tr, d_tracer);
9246 }
9247
9248 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9249 {
9250         struct vfsmount *mnt;
9251         struct file_system_type *type;
9252
9253         /*
9254          * To maintain backward compatibility for tools that mount
9255          * debugfs to get to the tracing facility, tracefs is automatically
9256          * mounted to the debugfs/tracing directory.
9257          */
9258         type = get_fs_type("tracefs");
9259         if (!type)
9260                 return NULL;
9261         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9262         put_filesystem(type);
9263         if (IS_ERR(mnt))
9264                 return NULL;
9265         mntget(mnt);
9266
9267         return mnt;
9268 }
9269
9270 /**
9271  * tracing_init_dentry - initialize top level trace array
9272  *
9273  * This is called when creating files or directories in the tracing
9274  * directory. It is called via fs_initcall() by any of the boot up code
9275  * and expects to return the dentry of the top level tracing directory.
9276  */
9277 int tracing_init_dentry(void)
9278 {
9279         struct trace_array *tr = &global_trace;
9280
9281         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9282                 pr_warn("Tracing disabled due to lockdown\n");
9283                 return -EPERM;
9284         }
9285
9286         /* The top level trace array uses  NULL as parent */
9287         if (tr->dir)
9288                 return 0;
9289
9290         if (WARN_ON(!tracefs_initialized()))
9291                 return -ENODEV;
9292
9293         /*
9294          * As there may still be users that expect the tracing
9295          * files to exist in debugfs/tracing, we must automount
9296          * the tracefs file system there, so older tools still
9297          * work with the newer kerenl.
9298          */
9299         tr->dir = debugfs_create_automount("tracing", NULL,
9300                                            trace_automount, NULL);
9301
9302         return 0;
9303 }
9304
9305 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9306 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9307
9308 static void __init trace_eval_init(void)
9309 {
9310         int len;
9311
9312         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9313         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9314 }
9315
9316 #ifdef CONFIG_MODULES
9317 static void trace_module_add_evals(struct module *mod)
9318 {
9319         if (!mod->num_trace_evals)
9320                 return;
9321
9322         /*
9323          * Modules with bad taint do not have events created, do
9324          * not bother with enums either.
9325          */
9326         if (trace_module_has_bad_taint(mod))
9327                 return;
9328
9329         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9330 }
9331
9332 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9333 static void trace_module_remove_evals(struct module *mod)
9334 {
9335         union trace_eval_map_item *map;
9336         union trace_eval_map_item **last = &trace_eval_maps;
9337
9338         if (!mod->num_trace_evals)
9339                 return;
9340
9341         mutex_lock(&trace_eval_mutex);
9342
9343         map = trace_eval_maps;
9344
9345         while (map) {
9346                 if (map->head.mod == mod)
9347                         break;
9348                 map = trace_eval_jmp_to_tail(map);
9349                 last = &map->tail.next;
9350                 map = map->tail.next;
9351         }
9352         if (!map)
9353                 goto out;
9354
9355         *last = trace_eval_jmp_to_tail(map)->tail.next;
9356         kfree(map);
9357  out:
9358         mutex_unlock(&trace_eval_mutex);
9359 }
9360 #else
9361 static inline void trace_module_remove_evals(struct module *mod) { }
9362 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9363
9364 static int trace_module_notify(struct notifier_block *self,
9365                                unsigned long val, void *data)
9366 {
9367         struct module *mod = data;
9368
9369         switch (val) {
9370         case MODULE_STATE_COMING:
9371                 trace_module_add_evals(mod);
9372                 break;
9373         case MODULE_STATE_GOING:
9374                 trace_module_remove_evals(mod);
9375                 break;
9376         }
9377
9378         return NOTIFY_OK;
9379 }
9380
9381 static struct notifier_block trace_module_nb = {
9382         .notifier_call = trace_module_notify,
9383         .priority = 0,
9384 };
9385 #endif /* CONFIG_MODULES */
9386
9387 static __init int tracer_init_tracefs(void)
9388 {
9389         int ret;
9390
9391         trace_access_lock_init();
9392
9393         ret = tracing_init_dentry();
9394         if (ret)
9395                 return 0;
9396
9397         event_trace_init();
9398
9399         init_tracer_tracefs(&global_trace, NULL);
9400         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9401
9402         trace_create_file("tracing_thresh", 0644, NULL,
9403                         &global_trace, &tracing_thresh_fops);
9404
9405         trace_create_file("README", 0444, NULL,
9406                         NULL, &tracing_readme_fops);
9407
9408         trace_create_file("saved_cmdlines", 0444, NULL,
9409                         NULL, &tracing_saved_cmdlines_fops);
9410
9411         trace_create_file("saved_cmdlines_size", 0644, NULL,
9412                           NULL, &tracing_saved_cmdlines_size_fops);
9413
9414         trace_create_file("saved_tgids", 0444, NULL,
9415                         NULL, &tracing_saved_tgids_fops);
9416
9417         trace_eval_init();
9418
9419         trace_create_eval_file(NULL);
9420
9421 #ifdef CONFIG_MODULES
9422         register_module_notifier(&trace_module_nb);
9423 #endif
9424
9425 #ifdef CONFIG_DYNAMIC_FTRACE
9426         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9427                         NULL, &tracing_dyn_info_fops);
9428 #endif
9429
9430         create_trace_instances(NULL);
9431
9432         update_tracer_options(&global_trace);
9433
9434         return 0;
9435 }
9436
9437 static int trace_panic_handler(struct notifier_block *this,
9438                                unsigned long event, void *unused)
9439 {
9440         if (ftrace_dump_on_oops)
9441                 ftrace_dump(ftrace_dump_on_oops);
9442         return NOTIFY_OK;
9443 }
9444
9445 static struct notifier_block trace_panic_notifier = {
9446         .notifier_call  = trace_panic_handler,
9447         .next           = NULL,
9448         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9449 };
9450
9451 static int trace_die_handler(struct notifier_block *self,
9452                              unsigned long val,
9453                              void *data)
9454 {
9455         switch (val) {
9456         case DIE_OOPS:
9457                 if (ftrace_dump_on_oops)
9458                         ftrace_dump(ftrace_dump_on_oops);
9459                 break;
9460         default:
9461                 break;
9462         }
9463         return NOTIFY_OK;
9464 }
9465
9466 static struct notifier_block trace_die_notifier = {
9467         .notifier_call = trace_die_handler,
9468         .priority = 200
9469 };
9470
9471 /*
9472  * printk is set to max of 1024, we really don't need it that big.
9473  * Nothing should be printing 1000 characters anyway.
9474  */
9475 #define TRACE_MAX_PRINT         1000
9476
9477 /*
9478  * Define here KERN_TRACE so that we have one place to modify
9479  * it if we decide to change what log level the ftrace dump
9480  * should be at.
9481  */
9482 #define KERN_TRACE              KERN_EMERG
9483
9484 void
9485 trace_printk_seq(struct trace_seq *s)
9486 {
9487         /* Probably should print a warning here. */
9488         if (s->seq.len >= TRACE_MAX_PRINT)
9489                 s->seq.len = TRACE_MAX_PRINT;
9490
9491         /*
9492          * More paranoid code. Although the buffer size is set to
9493          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9494          * an extra layer of protection.
9495          */
9496         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9497                 s->seq.len = s->seq.size - 1;
9498
9499         /* should be zero ended, but we are paranoid. */
9500         s->buffer[s->seq.len] = 0;
9501
9502         printk(KERN_TRACE "%s", s->buffer);
9503
9504         trace_seq_init(s);
9505 }
9506
9507 void trace_init_global_iter(struct trace_iterator *iter)
9508 {
9509         iter->tr = &global_trace;
9510         iter->trace = iter->tr->current_trace;
9511         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9512         iter->array_buffer = &global_trace.array_buffer;
9513
9514         if (iter->trace && iter->trace->open)
9515                 iter->trace->open(iter);
9516
9517         /* Annotate start of buffers if we had overruns */
9518         if (ring_buffer_overruns(iter->array_buffer->buffer))
9519                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9520
9521         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9522         if (trace_clocks[iter->tr->clock_id].in_ns)
9523                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9524
9525         /* Can not use kmalloc for iter.temp and iter.fmt */
9526         iter->temp = static_temp_buf;
9527         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9528         iter->fmt = static_fmt_buf;
9529         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9530 }
9531
9532 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9533 {
9534         /* use static because iter can be a bit big for the stack */
9535         static struct trace_iterator iter;
9536         static atomic_t dump_running;
9537         struct trace_array *tr = &global_trace;
9538         unsigned int old_userobj;
9539         unsigned long flags;
9540         int cnt = 0, cpu;
9541
9542         /* Only allow one dump user at a time. */
9543         if (atomic_inc_return(&dump_running) != 1) {
9544                 atomic_dec(&dump_running);
9545                 return;
9546         }
9547
9548         /*
9549          * Always turn off tracing when we dump.
9550          * We don't need to show trace output of what happens
9551          * between multiple crashes.
9552          *
9553          * If the user does a sysrq-z, then they can re-enable
9554          * tracing with echo 1 > tracing_on.
9555          */
9556         tracing_off();
9557
9558         local_irq_save(flags);
9559         printk_nmi_direct_enter();
9560
9561         /* Simulate the iterator */
9562         trace_init_global_iter(&iter);
9563
9564         for_each_tracing_cpu(cpu) {
9565                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9566         }
9567
9568         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9569
9570         /* don't look at user memory in panic mode */
9571         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9572
9573         switch (oops_dump_mode) {
9574         case DUMP_ALL:
9575                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9576                 break;
9577         case DUMP_ORIG:
9578                 iter.cpu_file = raw_smp_processor_id();
9579                 break;
9580         case DUMP_NONE:
9581                 goto out_enable;
9582         default:
9583                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9584                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9585         }
9586
9587         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9588
9589         /* Did function tracer already get disabled? */
9590         if (ftrace_is_dead()) {
9591                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9592                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9593         }
9594
9595         /*
9596          * We need to stop all tracing on all CPUS to read
9597          * the next buffer. This is a bit expensive, but is
9598          * not done often. We fill all what we can read,
9599          * and then release the locks again.
9600          */
9601
9602         while (!trace_empty(&iter)) {
9603
9604                 if (!cnt)
9605                         printk(KERN_TRACE "---------------------------------\n");
9606
9607                 cnt++;
9608
9609                 trace_iterator_reset(&iter);
9610                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9611
9612                 if (trace_find_next_entry_inc(&iter) != NULL) {
9613                         int ret;
9614
9615                         ret = print_trace_line(&iter);
9616                         if (ret != TRACE_TYPE_NO_CONSUME)
9617                                 trace_consume(&iter);
9618                 }
9619                 touch_nmi_watchdog();
9620
9621                 trace_printk_seq(&iter.seq);
9622         }
9623
9624         if (!cnt)
9625                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9626         else
9627                 printk(KERN_TRACE "---------------------------------\n");
9628
9629  out_enable:
9630         tr->trace_flags |= old_userobj;
9631
9632         for_each_tracing_cpu(cpu) {
9633                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9634         }
9635         atomic_dec(&dump_running);
9636         printk_nmi_direct_exit();
9637         local_irq_restore(flags);
9638 }
9639 EXPORT_SYMBOL_GPL(ftrace_dump);
9640
9641 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9642 {
9643         char **argv;
9644         int argc, ret;
9645
9646         argc = 0;
9647         ret = 0;
9648         argv = argv_split(GFP_KERNEL, buf, &argc);
9649         if (!argv)
9650                 return -ENOMEM;
9651
9652         if (argc)
9653                 ret = createfn(argc, argv);
9654
9655         argv_free(argv);
9656
9657         return ret;
9658 }
9659
9660 #define WRITE_BUFSIZE  4096
9661
9662 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9663                                 size_t count, loff_t *ppos,
9664                                 int (*createfn)(int, char **))
9665 {
9666         char *kbuf, *buf, *tmp;
9667         int ret = 0;
9668         size_t done = 0;
9669         size_t size;
9670
9671         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9672         if (!kbuf)
9673                 return -ENOMEM;
9674
9675         while (done < count) {
9676                 size = count - done;
9677
9678                 if (size >= WRITE_BUFSIZE)
9679                         size = WRITE_BUFSIZE - 1;
9680
9681                 if (copy_from_user(kbuf, buffer + done, size)) {
9682                         ret = -EFAULT;
9683                         goto out;
9684                 }
9685                 kbuf[size] = '\0';
9686                 buf = kbuf;
9687                 do {
9688                         tmp = strchr(buf, '\n');
9689                         if (tmp) {
9690                                 *tmp = '\0';
9691                                 size = tmp - buf + 1;
9692                         } else {
9693                                 size = strlen(buf);
9694                                 if (done + size < count) {
9695                                         if (buf != kbuf)
9696                                                 break;
9697                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9698                                         pr_warn("Line length is too long: Should be less than %d\n",
9699                                                 WRITE_BUFSIZE - 2);
9700                                         ret = -EINVAL;
9701                                         goto out;
9702                                 }
9703                         }
9704                         done += size;
9705
9706                         /* Remove comments */
9707                         tmp = strchr(buf, '#');
9708
9709                         if (tmp)
9710                                 *tmp = '\0';
9711
9712                         ret = trace_run_command(buf, createfn);
9713                         if (ret)
9714                                 goto out;
9715                         buf += size;
9716
9717                 } while (done < count);
9718         }
9719         ret = done;
9720
9721 out:
9722         kfree(kbuf);
9723
9724         return ret;
9725 }
9726
9727 __init static int tracer_alloc_buffers(void)
9728 {
9729         int ring_buf_size;
9730         int ret = -ENOMEM;
9731
9732
9733         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9734                 pr_warn("Tracing disabled due to lockdown\n");
9735                 return -EPERM;
9736         }
9737
9738         /*
9739          * Make sure we don't accidentally add more trace options
9740          * than we have bits for.
9741          */
9742         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9743
9744         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9745                 goto out;
9746
9747         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9748                 goto out_free_buffer_mask;
9749
9750         /* Only allocate trace_printk buffers if a trace_printk exists */
9751         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9752                 /* Must be called before global_trace.buffer is allocated */
9753                 trace_printk_init_buffers();
9754
9755         /* To save memory, keep the ring buffer size to its minimum */
9756         if (ring_buffer_expanded)
9757                 ring_buf_size = trace_buf_size;
9758         else
9759                 ring_buf_size = 1;
9760
9761         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9762         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9763
9764         raw_spin_lock_init(&global_trace.start_lock);
9765
9766         /*
9767          * The prepare callbacks allocates some memory for the ring buffer. We
9768          * don't free the buffer if the CPU goes down. If we were to free
9769          * the buffer, then the user would lose any trace that was in the
9770          * buffer. The memory will be removed once the "instance" is removed.
9771          */
9772         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9773                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9774                                       NULL);
9775         if (ret < 0)
9776                 goto out_free_cpumask;
9777         /* Used for event triggers */
9778         ret = -ENOMEM;
9779         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9780         if (!temp_buffer)
9781                 goto out_rm_hp_state;
9782
9783         if (trace_create_savedcmd() < 0)
9784                 goto out_free_temp_buffer;
9785
9786         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
9787                 goto out_free_savedcmd;
9788
9789         /* TODO: make the number of buffers hot pluggable with CPUS */
9790         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9791                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9792                 goto out_free_pipe_cpumask;
9793         }
9794         if (global_trace.buffer_disabled)
9795                 tracing_off();
9796
9797         if (trace_boot_clock) {
9798                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9799                 if (ret < 0)
9800                         pr_warn("Trace clock %s not defined, going back to default\n",
9801                                 trace_boot_clock);
9802         }
9803
9804         /*
9805          * register_tracer() might reference current_trace, so it
9806          * needs to be set before we register anything. This is
9807          * just a bootstrap of current_trace anyway.
9808          */
9809         global_trace.current_trace = &nop_trace;
9810
9811         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9812
9813         ftrace_init_global_array_ops(&global_trace);
9814
9815         init_trace_flags_index(&global_trace);
9816
9817         register_tracer(&nop_trace);
9818
9819         /* Function tracing may start here (via kernel command line) */
9820         init_function_trace();
9821
9822         /* All seems OK, enable tracing */
9823         tracing_disabled = 0;
9824
9825         atomic_notifier_chain_register(&panic_notifier_list,
9826                                        &trace_panic_notifier);
9827
9828         register_die_notifier(&trace_die_notifier);
9829
9830         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9831
9832         INIT_LIST_HEAD(&global_trace.systems);
9833         INIT_LIST_HEAD(&global_trace.events);
9834         INIT_LIST_HEAD(&global_trace.hist_vars);
9835         INIT_LIST_HEAD(&global_trace.err_log);
9836         list_add(&global_trace.list, &ftrace_trace_arrays);
9837
9838         apply_trace_boot_options();
9839
9840         register_snapshot_cmd();
9841
9842         return 0;
9843
9844 out_free_pipe_cpumask:
9845         free_cpumask_var(global_trace.pipe_cpumask);
9846 out_free_savedcmd:
9847         free_saved_cmdlines_buffer(savedcmd);
9848 out_free_temp_buffer:
9849         ring_buffer_free(temp_buffer);
9850 out_rm_hp_state:
9851         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9852 out_free_cpumask:
9853         free_cpumask_var(global_trace.tracing_cpumask);
9854 out_free_buffer_mask:
9855         free_cpumask_var(tracing_buffer_mask);
9856 out:
9857         return ret;
9858 }
9859
9860 void __init early_trace_init(void)
9861 {
9862         if (tracepoint_printk) {
9863                 tracepoint_print_iter =
9864                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9865                 if (MEM_FAIL(!tracepoint_print_iter,
9866                              "Failed to allocate trace iterator\n"))
9867                         tracepoint_printk = 0;
9868                 else
9869                         static_key_enable(&tracepoint_printk_key.key);
9870         }
9871         tracer_alloc_buffers();
9872
9873         init_events();
9874 }
9875
9876 void __init trace_init(void)
9877 {
9878         trace_event_init();
9879 }
9880
9881 __init static int clear_boot_tracer(void)
9882 {
9883         /*
9884          * The default tracer at boot buffer is an init section.
9885          * This function is called in lateinit. If we did not
9886          * find the boot tracer, then clear it out, to prevent
9887          * later registration from accessing the buffer that is
9888          * about to be freed.
9889          */
9890         if (!default_bootup_tracer)
9891                 return 0;
9892
9893         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9894                default_bootup_tracer);
9895         default_bootup_tracer = NULL;
9896
9897         return 0;
9898 }
9899
9900 fs_initcall(tracer_init_tracefs);
9901 late_initcall_sync(clear_boot_tracer);
9902
9903 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9904 __init static int tracing_set_default_clock(void)
9905 {
9906         /* sched_clock_stable() is determined in late_initcall */
9907         if (!trace_boot_clock && !sched_clock_stable()) {
9908                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9909                         pr_warn("Can not set tracing clock due to lockdown\n");
9910                         return -EPERM;
9911                 }
9912
9913                 printk(KERN_WARNING
9914                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9915                        "If you want to keep using the local clock, then add:\n"
9916                        "  \"trace_clock=local\"\n"
9917                        "on the kernel command line\n");
9918                 tracing_set_clock(&global_trace, "global");
9919         }
9920
9921         return 0;
9922 }
9923 late_initcall_sync(tracing_set_default_clock);
9924 #endif