1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
74 bool __read_mostly tracing_selftest_disabled;
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
115 static int tracing_disabled = 1;
117 cpumask_var_t __read_mostly tracing_buffer_mask;
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 enum ftrace_dump_mode ftrace_dump_on_oops;
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
144 unsigned long length;
147 union trace_eval_map_item;
149 struct trace_eval_map_tail {
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
158 static DEFINE_MUTEX(trace_eval_mutex);
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned int trace_ctx);
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
185 static bool allocate_snapshot;
187 static int __init set_cmdline_ftrace(char *str)
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
195 __setup("ftrace=", set_cmdline_ftrace);
197 static int __init set_ftrace_dump_on_oops(char *str)
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 static int __init stop_trace_on_warning(char *str)
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
219 __setup("traceoff_on_warning", stop_trace_on_warning);
221 static int __init boot_alloc_snapshot(char *str)
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
228 __setup("alloc_snapshot", boot_alloc_snapshot);
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 static int __init set_trace_boot_options(char *str)
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 __setup("trace_options=", set_trace_boot_options);
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
243 static int __init set_trace_boot_clock(char *str)
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
249 __setup("trace_clock=", set_trace_boot_clock);
251 static int __init set_tracepoint_printk(char *str)
253 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 tracepoint_printk = 1;
257 __setup("tp_printk", set_tracepoint_printk);
259 unsigned long long ns2usecs(u64 nsec)
267 trace_process_export(struct trace_export *export,
268 struct ring_buffer_event *event, int flag)
270 struct trace_entry *entry;
271 unsigned int size = 0;
273 if (export->flags & flag) {
274 entry = ring_buffer_event_data(event);
275 size = ring_buffer_event_length(event);
276 export->write(export, entry, size);
280 static DEFINE_MUTEX(ftrace_export_lock);
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
288 static inline void ftrace_exports_enable(struct trace_export *export)
290 if (export->flags & TRACE_EXPORT_FUNCTION)
291 static_branch_inc(&trace_function_exports_enabled);
293 if (export->flags & TRACE_EXPORT_EVENT)
294 static_branch_inc(&trace_event_exports_enabled);
296 if (export->flags & TRACE_EXPORT_MARKER)
297 static_branch_inc(&trace_marker_exports_enabled);
300 static inline void ftrace_exports_disable(struct trace_export *export)
302 if (export->flags & TRACE_EXPORT_FUNCTION)
303 static_branch_dec(&trace_function_exports_enabled);
305 if (export->flags & TRACE_EXPORT_EVENT)
306 static_branch_dec(&trace_event_exports_enabled);
308 if (export->flags & TRACE_EXPORT_MARKER)
309 static_branch_dec(&trace_marker_exports_enabled);
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
314 struct trace_export *export;
316 preempt_disable_notrace();
318 export = rcu_dereference_raw_check(ftrace_exports_list);
320 trace_process_export(export, event, flag);
321 export = rcu_dereference_raw_check(export->next);
324 preempt_enable_notrace();
328 add_trace_export(struct trace_export **list, struct trace_export *export)
330 rcu_assign_pointer(export->next, *list);
332 * We are entering export into the list but another
333 * CPU might be walking that list. We need to make sure
334 * the export->next pointer is valid before another CPU sees
335 * the export pointer included into the list.
337 rcu_assign_pointer(*list, export);
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
343 struct trace_export **p;
345 for (p = list; *p != NULL; p = &(*p)->next)
352 rcu_assign_pointer(*p, (*p)->next);
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
360 ftrace_exports_enable(export);
362 add_trace_export(list, export);
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
370 ret = rm_trace_export(list, export);
371 ftrace_exports_disable(export);
376 int register_ftrace_export(struct trace_export *export)
378 if (WARN_ON_ONCE(!export->write))
381 mutex_lock(&ftrace_export_lock);
383 add_ftrace_export(&ftrace_exports_list, export);
385 mutex_unlock(&ftrace_export_lock);
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
391 int unregister_ftrace_export(struct trace_export *export)
395 mutex_lock(&ftrace_export_lock);
397 ret = rm_ftrace_export(&ftrace_exports_list, export);
399 mutex_unlock(&ftrace_export_lock);
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS \
407 (FUNCTION_DEFAULT_FLAGS | \
408 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
409 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
410 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
411 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
416 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
423 * The global_trace is the descriptor that holds the top-level tracing
424 * buffers for the live tracing.
426 static struct trace_array global_trace = {
427 .trace_flags = TRACE_DEFAULT_FLAGS,
430 LIST_HEAD(ftrace_trace_arrays);
432 int trace_array_get(struct trace_array *this_tr)
434 struct trace_array *tr;
437 mutex_lock(&trace_types_lock);
438 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
445 mutex_unlock(&trace_types_lock);
450 static void __trace_array_put(struct trace_array *this_tr)
452 WARN_ON(!this_tr->ref);
457 * trace_array_put - Decrement the reference counter for this trace array.
458 * @this_tr : pointer to the trace array
460 * NOTE: Use this when we no longer need the trace array returned by
461 * trace_array_get_by_name(). This ensures the trace array can be later
465 void trace_array_put(struct trace_array *this_tr)
470 mutex_lock(&trace_types_lock);
471 __trace_array_put(this_tr);
472 mutex_unlock(&trace_types_lock);
474 EXPORT_SYMBOL_GPL(trace_array_put);
476 int tracing_check_open_get_tr(struct trace_array *tr)
480 ret = security_locked_down(LOCKDOWN_TRACEFS);
484 if (tracing_disabled)
487 if (tr && trace_array_get(tr) < 0)
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 struct trace_buffer *buffer,
495 struct ring_buffer_event *event)
497 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 !filter_match_preds(call->filter, rec)) {
499 __trace_event_discard_commit(buffer, event);
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
508 vfree(pid_list->pids);
513 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514 * @filtered_pids: The list of pids to check
515 * @search_pid: The PID to find in @filtered_pids
517 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 * If pid_max changed after filtered_pids was created, we
524 * by default ignore all pids greater than the previous pid_max.
526 if (search_pid >= filtered_pids->pid_max)
529 return test_bit(search_pid, filtered_pids->pids);
533 * trace_ignore_this_task - should a task be ignored for tracing
534 * @filtered_pids: The list of pids to check
535 * @filtered_no_pids: The list of pids not to be traced
536 * @task: The task that should be ignored if not filtered
538 * Checks if @task should be traced or not from @filtered_pids.
539 * Returns true if @task should *NOT* be traced.
540 * Returns false if @task should be traced.
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 struct trace_pid_list *filtered_no_pids,
545 struct task_struct *task)
548 * If filtered_no_pids is not empty, and the task's pid is listed
549 * in filtered_no_pids, then return true.
550 * Otherwise, if filtered_pids is empty, that means we can
551 * trace all tasks. If it has content, then only trace pids
552 * within filtered_pids.
555 return (filtered_pids &&
556 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 trace_find_filtered_pid(filtered_no_pids, task->pid));
562 * trace_filter_add_remove_task - Add or remove a task from a pid_list
563 * @pid_list: The list to modify
564 * @self: The current task for fork or NULL for exit
565 * @task: The task to add or remove
567 * If adding a task, if @self is defined, the task is only added if @self
568 * is also included in @pid_list. This happens on fork and tasks should
569 * only be added when the parent is listed. If @self is NULL, then the
570 * @task pid will be removed from the list, which would happen on exit
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 struct task_struct *self,
575 struct task_struct *task)
580 /* For forks, we only add if the forking task is listed */
582 if (!trace_find_filtered_pid(pid_list, self->pid))
586 /* Sorry, but we don't support pid_max changing after setting */
587 if (task->pid >= pid_list->pid_max)
590 /* "self" is set for forks, and NULL for exits */
592 set_bit(task->pid, pid_list->pids);
594 clear_bit(task->pid, pid_list->pids);
598 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599 * @pid_list: The pid list to show
600 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601 * @pos: The position of the file
603 * This is used by the seq_file "next" operation to iterate the pids
604 * listed in a trace_pid_list structure.
606 * Returns the pid+1 as we want to display pid of zero, but NULL would
607 * stop the iteration.
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 unsigned long pid = (unsigned long)v;
615 /* pid already is +1 of the actual previous bit */
616 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618 /* Return pid + 1 to allow zero to be represented */
619 if (pid < pid_list->pid_max)
620 return (void *)(pid + 1);
626 * trace_pid_start - Used for seq_file to start reading pid lists
627 * @pid_list: The pid list to show
628 * @pos: The position of the file
630 * This is used by seq_file "start" operation to start the iteration
633 * Returns the pid+1 as we want to display pid of zero, but NULL would
634 * stop the iteration.
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
641 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 if (pid >= pid_list->pid_max)
645 /* Return pid + 1 so that zero can be the exit value */
646 for (pid++; pid && l < *pos;
647 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
653 * trace_pid_show - show the current pid in seq_file processing
654 * @m: The seq_file structure to write into
655 * @v: A void pointer of the pid (+1) value to display
657 * Can be directly used by seq_file operations to display the current
660 int trace_pid_show(struct seq_file *m, void *v)
662 unsigned long pid = (unsigned long)v - 1;
664 seq_printf(m, "%lu\n", pid);
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE 127
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 struct trace_pid_list **new_pid_list,
673 const char __user *ubuf, size_t cnt)
675 struct trace_pid_list *pid_list;
676 struct trace_parser parser;
684 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
688 * Always recreate a new array. The write is an all or nothing
689 * operation. Always create a new array when adding new pids by
690 * the user. If the operation fails, then the current list is
693 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 trace_parser_put(&parser);
699 pid_list->pid_max = READ_ONCE(pid_max);
701 /* Only truncating will shrink pid_max */
702 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 pid_list->pid_max = filtered_pids->pid_max;
705 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 if (!pid_list->pids) {
707 trace_parser_put(&parser);
713 /* copy the current bits to the new max */
714 for_each_set_bit(pid, filtered_pids->pids,
715 filtered_pids->pid_max) {
716 set_bit(pid, pid_list->pids);
725 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 if (ret < 0 || !trace_parser_loaded(&parser))
734 if (kstrtoul(parser.buffer, 0, &val))
736 if (val >= pid_list->pid_max)
741 set_bit(pid, pid_list->pids);
744 trace_parser_clear(&parser);
747 trace_parser_put(&parser);
750 trace_free_pid_list(pid_list);
755 /* Cleared the list of pids */
756 trace_free_pid_list(pid_list);
761 *new_pid_list = pid_list;
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
770 /* Early boot up does not have a buffer yet */
772 return trace_clock_local();
774 ts = ring_buffer_time_stamp(buf->buffer);
775 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
780 u64 ftrace_now(int cpu)
782 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
786 * tracing_is_enabled - Show if global_trace has been enabled
788 * Shows if the global trace has been enabled or not. It uses the
789 * mirror flag "buffer_disabled" to be used in fast paths such as for
790 * the irqsoff tracer. But it may be inaccurate due to races. If you
791 * need to know the accurate state, use tracing_is_on() which is a little
792 * slower, but accurate.
794 int tracing_is_enabled(void)
797 * For quick access (irqsoff uses this in fast path), just
798 * return the mirror variable of the state of the ring buffer.
799 * It's a little racy, but we don't really care.
802 return !global_trace.buffer_disabled;
806 * trace_buf_size is the size in bytes that is allocated
807 * for a buffer. Note, the number of bytes is always rounded
810 * This number is purposely set to a low number of 16384.
811 * If the dump on oops happens, it will be much appreciated
812 * to not have to wait for all that output. Anyway this can be
813 * boot time and run time configurable.
815 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer *trace_types __read_mostly;
823 * trace_types_lock is used to protect the trace_types list.
825 DEFINE_MUTEX(trace_types_lock);
828 * serialize the access of the ring buffer
830 * ring buffer serializes readers, but it is low level protection.
831 * The validity of the events (which returns by ring_buffer_peek() ..etc)
832 * are not protected by ring buffer.
834 * The content of events may become garbage if we allow other process consumes
835 * these events concurrently:
836 * A) the page of the consumed events may become a normal page
837 * (not reader page) in ring buffer, and this page will be rewritten
838 * by events producer.
839 * B) The page of the consumed events may become a page for splice_read,
840 * and this page will be returned to system.
842 * These primitives allow multi process access to different cpu ring buffer
845 * These primitives don't distinguish read-only and read-consume access.
846 * Multi read-only access are also serialized.
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 static inline void trace_access_lock(int cpu)
855 if (cpu == RING_BUFFER_ALL_CPUS) {
856 /* gain it for accessing the whole ring buffer. */
857 down_write(&all_cpu_access_lock);
859 /* gain it for accessing a cpu ring buffer. */
861 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 down_read(&all_cpu_access_lock);
864 /* Secondly block other access to this @cpu ring buffer. */
865 mutex_lock(&per_cpu(cpu_access_lock, cpu));
869 static inline void trace_access_unlock(int cpu)
871 if (cpu == RING_BUFFER_ALL_CPUS) {
872 up_write(&all_cpu_access_lock);
874 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 up_read(&all_cpu_access_lock);
879 static inline void trace_access_lock_init(void)
883 for_each_possible_cpu(cpu)
884 mutex_init(&per_cpu(cpu_access_lock, cpu));
889 static DEFINE_MUTEX(access_lock);
891 static inline void trace_access_lock(int cpu)
894 mutex_lock(&access_lock);
897 static inline void trace_access_unlock(int cpu)
900 mutex_unlock(&access_lock);
903 static inline void trace_access_lock_init(void)
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 unsigned int trace_ctx,
912 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 struct trace_buffer *buffer,
915 unsigned int trace_ctx,
916 int skip, struct pt_regs *regs);
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 unsigned int trace_ctx,
921 int skip, struct pt_regs *regs)
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 struct trace_buffer *buffer,
926 unsigned long trace_ctx,
927 int skip, struct pt_regs *regs)
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 int type, unsigned int trace_ctx)
937 struct trace_entry *ent = ring_buffer_event_data(event);
939 tracing_generic_entry_update(ent, type, trace_ctx);
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
946 unsigned int trace_ctx)
948 struct ring_buffer_event *event;
950 event = ring_buffer_lock_reserve(buffer, len);
952 trace_event_setup(event, type, trace_ctx);
957 void tracer_tracing_on(struct trace_array *tr)
959 if (tr->array_buffer.buffer)
960 ring_buffer_record_on(tr->array_buffer.buffer);
962 * This flag is looked at when buffers haven't been allocated
963 * yet, or by some tracers (like irqsoff), that just want to
964 * know if the ring buffer has been disabled, but it can handle
965 * races of where it gets disabled but we still do a record.
966 * As the check is in the fast path of the tracers, it is more
967 * important to be fast than accurate.
969 tr->buffer_disabled = 0;
970 /* Make the flag seen by readers */
975 * tracing_on - enable tracing buffers
977 * This function enables tracing buffers that may have been
978 * disabled with tracing_off.
980 void tracing_on(void)
982 tracer_tracing_on(&global_trace);
984 EXPORT_SYMBOL_GPL(tracing_on);
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 __this_cpu_write(trace_taskinfo_save, true);
992 /* If this is the temp buffer, we need to commit fully */
993 if (this_cpu_read(trace_buffered_event) == event) {
994 /* Length is in event->array[0] */
995 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 /* Release the temp buffer */
997 this_cpu_dec(trace_buffered_event_cnt);
999 ring_buffer_unlock_commit(buffer, event);
1003 * __trace_puts - write a constant string into the trace buffer.
1004 * @ip: The address of the caller
1005 * @str: The constant string to write
1006 * @size: The size of the string.
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1010 struct ring_buffer_event *event;
1011 struct trace_buffer *buffer;
1012 struct print_entry *entry;
1013 unsigned int trace_ctx;
1016 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 if (unlikely(tracing_selftest_running || tracing_disabled))
1022 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024 trace_ctx = tracing_gen_ctx();
1025 buffer = global_trace.array_buffer.buffer;
1026 ring_buffer_nest_start(buffer);
1027 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1034 entry = ring_buffer_event_data(event);
1037 memcpy(&entry->buf, str, size);
1039 /* Add a newline if necessary */
1040 if (entry->buf[size - 1] != '\n') {
1041 entry->buf[size] = '\n';
1042 entry->buf[size + 1] = '\0';
1044 entry->buf[size] = '\0';
1046 __buffer_unlock_commit(buffer, event);
1047 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049 ring_buffer_nest_end(buffer);
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1055 * __trace_bputs - write the pointer to a constant string into trace buffer
1056 * @ip: The address of the caller
1057 * @str: The constant string to write to the buffer to
1059 int __trace_bputs(unsigned long ip, const char *str)
1061 struct ring_buffer_event *event;
1062 struct trace_buffer *buffer;
1063 struct bputs_entry *entry;
1064 unsigned int trace_ctx;
1065 int size = sizeof(struct bputs_entry);
1068 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1071 if (unlikely(tracing_selftest_running || tracing_disabled))
1074 trace_ctx = tracing_gen_ctx();
1075 buffer = global_trace.array_buffer.buffer;
1077 ring_buffer_nest_start(buffer);
1078 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1083 entry = ring_buffer_event_data(event);
1087 __buffer_unlock_commit(buffer, event);
1088 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 ring_buffer_nest_end(buffer);
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1101 struct tracer *tracer = tr->current_trace;
1102 unsigned long flags;
1105 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 internal_trace_puts("*** snapshot is being ignored ***\n");
1110 if (!tr->allocated_snapshot) {
1111 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 internal_trace_puts("*** stopping trace here! ***\n");
1117 /* Note, snapshot can not be used when the tracer uses it */
1118 if (tracer->use_max_tr) {
1119 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 local_irq_save(flags);
1125 update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 local_irq_restore(flags);
1129 void tracing_snapshot_instance(struct trace_array *tr)
1131 tracing_snapshot_instance_cond(tr, NULL);
1135 * tracing_snapshot - take a snapshot of the current buffer.
1137 * This causes a swap between the snapshot buffer and the current live
1138 * tracing buffer. You can use this to take snapshots of the live
1139 * trace when some condition is triggered, but continue to trace.
1141 * Note, make sure to allocate the snapshot with either
1142 * a tracing_snapshot_alloc(), or by doing it manually
1143 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145 * If the snapshot buffer is not allocated, it will stop tracing.
1146 * Basically making a permanent snapshot.
1148 void tracing_snapshot(void)
1150 struct trace_array *tr = &global_trace;
1152 tracing_snapshot_instance(tr);
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1157 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158 * @tr: The tracing instance to snapshot
1159 * @cond_data: The data to be tested conditionally, and possibly saved
1161 * This is the same as tracing_snapshot() except that the snapshot is
1162 * conditional - the snapshot will only happen if the
1163 * cond_snapshot.update() implementation receiving the cond_data
1164 * returns true, which means that the trace array's cond_snapshot
1165 * update() operation used the cond_data to determine whether the
1166 * snapshot should be taken, and if it was, presumably saved it along
1167 * with the snapshot.
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 tracing_snapshot_instance_cond(tr, cond_data);
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1176 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177 * @tr: The tracing instance
1179 * When the user enables a conditional snapshot using
1180 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181 * with the snapshot. This accessor is used to retrieve it.
1183 * Should not be called from cond_snapshot.update(), since it takes
1184 * the tr->max_lock lock, which the code calling
1185 * cond_snapshot.update() has already done.
1187 * Returns the cond_data associated with the trace array's snapshot.
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 void *cond_data = NULL;
1193 arch_spin_lock(&tr->max_lock);
1195 if (tr->cond_snapshot)
1196 cond_data = tr->cond_snapshot->cond_data;
1198 arch_spin_unlock(&tr->max_lock);
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 if (!tr->allocated_snapshot) {
1214 /* allocate spare buffer */
1215 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220 tr->allocated_snapshot = true;
1226 static void free_snapshot(struct trace_array *tr)
1229 * We don't free the ring buffer. instead, resize it because
1230 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 * we want preserve it.
1233 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 set_buffer_entries(&tr->max_buffer, 1);
1235 tracing_reset_online_cpus(&tr->max_buffer);
1236 tr->allocated_snapshot = false;
1240 * tracing_alloc_snapshot - allocate snapshot buffer.
1242 * This only allocates the snapshot buffer if it isn't already
1243 * allocated - it doesn't also take a snapshot.
1245 * This is meant to be used in cases where the snapshot buffer needs
1246 * to be set up for events that can't sleep but need to be able to
1247 * trigger a snapshot.
1249 int tracing_alloc_snapshot(void)
1251 struct trace_array *tr = &global_trace;
1254 ret = tracing_alloc_snapshot_instance(tr);
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264 * This is similar to tracing_snapshot(), but it will allocate the
1265 * snapshot buffer if it isn't already allocated. Use this only
1266 * where it is safe to sleep, as the allocation may sleep.
1268 * This causes a swap between the snapshot buffer and the current live
1269 * tracing buffer. You can use this to take snapshots of the live
1270 * trace when some condition is triggered, but continue to trace.
1272 void tracing_snapshot_alloc(void)
1276 ret = tracing_alloc_snapshot();
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286 * @tr: The tracing instance
1287 * @cond_data: User data to associate with the snapshot
1288 * @update: Implementation of the cond_snapshot update function
1290 * Check whether the conditional snapshot for the given instance has
1291 * already been enabled, or if the current tracer is already using a
1292 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293 * save the cond_data and update function inside.
1295 * Returns 0 if successful, error otherwise.
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 cond_update_fn_t update)
1300 struct cond_snapshot *cond_snapshot;
1303 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307 cond_snapshot->cond_data = cond_data;
1308 cond_snapshot->update = update;
1310 mutex_lock(&trace_types_lock);
1312 ret = tracing_alloc_snapshot_instance(tr);
1316 if (tr->current_trace->use_max_tr) {
1322 * The cond_snapshot can only change to NULL without the
1323 * trace_types_lock. We don't care if we race with it going
1324 * to NULL, but we want to make sure that it's not set to
1325 * something other than NULL when we get here, which we can
1326 * do safely with only holding the trace_types_lock and not
1327 * having to take the max_lock.
1329 if (tr->cond_snapshot) {
1334 arch_spin_lock(&tr->max_lock);
1335 tr->cond_snapshot = cond_snapshot;
1336 arch_spin_unlock(&tr->max_lock);
1338 mutex_unlock(&trace_types_lock);
1343 mutex_unlock(&trace_types_lock);
1344 kfree(cond_snapshot);
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1350 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351 * @tr: The tracing instance
1353 * Check whether the conditional snapshot for the given instance is
1354 * enabled; if so, free the cond_snapshot associated with it,
1355 * otherwise return -EINVAL.
1357 * Returns 0 if successful, error otherwise.
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 arch_spin_lock(&tr->max_lock);
1365 if (!tr->cond_snapshot)
1368 kfree(tr->cond_snapshot);
1369 tr->cond_snapshot = NULL;
1372 arch_spin_unlock(&tr->max_lock);
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 void tracing_snapshot(void)
1380 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1390 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1417 void tracer_tracing_off(struct trace_array *tr)
1419 if (tr->array_buffer.buffer)
1420 ring_buffer_record_off(tr->array_buffer.buffer);
1422 * This flag is looked at when buffers haven't been allocated
1423 * yet, or by some tracers (like irqsoff), that just want to
1424 * know if the ring buffer has been disabled, but it can handle
1425 * races of where it gets disabled but we still do a record.
1426 * As the check is in the fast path of the tracers, it is more
1427 * important to be fast than accurate.
1429 tr->buffer_disabled = 1;
1430 /* Make the flag seen by readers */
1435 * tracing_off - turn off tracing buffers
1437 * This function stops the tracing buffers from recording data.
1438 * It does not disable any overhead the tracers themselves may
1439 * be causing. This function simply causes all recording to
1440 * the ring buffers to fail.
1442 void tracing_off(void)
1444 tracer_tracing_off(&global_trace);
1446 EXPORT_SYMBOL_GPL(tracing_off);
1448 void disable_trace_on_warning(void)
1450 if (__disable_trace_on_warning) {
1451 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 "Disabling tracing due to warning\n");
1458 * tracer_tracing_is_on - show real state of ring buffer enabled
1459 * @tr : the trace array to know if ring buffer is enabled
1461 * Shows real state of the ring buffer if it is enabled or not.
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1465 if (tr->array_buffer.buffer)
1466 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 return !tr->buffer_disabled;
1471 * tracing_is_on - show state of ring buffers enabled
1473 int tracing_is_on(void)
1475 return tracer_tracing_is_on(&global_trace);
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1479 static int __init set_buf_size(char *str)
1481 unsigned long buf_size;
1485 buf_size = memparse(str, &str);
1486 /* nr_entries can not be zero */
1489 trace_buf_size = buf_size;
1492 __setup("trace_buf_size=", set_buf_size);
1494 static int __init set_tracing_thresh(char *str)
1496 unsigned long threshold;
1501 ret = kstrtoul(str, 0, &threshold);
1504 tracing_thresh = threshold * 1000;
1507 __setup("tracing_thresh=", set_tracing_thresh);
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1511 return nsecs / 1000;
1515 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518 * of strings in the order that the evals (enum) were defined.
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1532 int in_ns; /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 { trace_clock_local, "local", 1 },
1535 { trace_clock_global, "global", 1 },
1536 { trace_clock_counter, "counter", 0 },
1537 { trace_clock_jiffies, "uptime", 0 },
1538 { trace_clock, "perf", 1 },
1539 { ktime_get_mono_fast_ns, "mono", 1 },
1540 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1541 { ktime_get_boot_fast_ns, "boot", 1 },
1545 bool trace_clock_in_ns(struct trace_array *tr)
1547 if (trace_clocks[tr->clock_id].in_ns)
1554 * trace_parser_get_init - gets the buffer for trace parser
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1558 memset(parser, 0, sizeof(*parser));
1560 parser->buffer = kmalloc(size, GFP_KERNEL);
1561 if (!parser->buffer)
1564 parser->size = size;
1569 * trace_parser_put - frees the buffer for trace parser
1571 void trace_parser_put(struct trace_parser *parser)
1573 kfree(parser->buffer);
1574 parser->buffer = NULL;
1578 * trace_get_user - reads the user input string separated by space
1579 * (matched by isspace(ch))
1581 * For each string found the 'struct trace_parser' is updated,
1582 * and the function returns.
1584 * Returns number of bytes read.
1586 * See kernel/trace/trace.h for 'struct trace_parser' details.
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 size_t cnt, loff_t *ppos)
1596 trace_parser_clear(parser);
1598 ret = get_user(ch, ubuf++);
1606 * The parser is not finished with the last write,
1607 * continue reading the user input without skipping spaces.
1609 if (!parser->cont) {
1610 /* skip white space */
1611 while (cnt && isspace(ch)) {
1612 ret = get_user(ch, ubuf++);
1621 /* only spaces were written */
1622 if (isspace(ch) || !ch) {
1629 /* read the non-space input */
1630 while (cnt && !isspace(ch) && ch) {
1631 if (parser->idx < parser->size - 1)
1632 parser->buffer[parser->idx++] = ch;
1637 ret = get_user(ch, ubuf++);
1644 /* We either got finished input or we have to wait for another call. */
1645 if (isspace(ch) || !ch) {
1646 parser->buffer[parser->idx] = 0;
1647 parser->cont = false;
1648 } else if (parser->idx < parser->size - 1) {
1649 parser->cont = true;
1650 parser->buffer[parser->idx++] = ch;
1651 /* Make sure the parsed string always terminates with '\0'. */
1652 parser->buffer[parser->idx] = 0;
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 if (trace_seq_used(s) <= s->seq.readpos)
1673 len = trace_seq_used(s) - s->seq.readpos;
1676 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1678 s->seq.readpos += cnt;
1682 unsigned long __read_mostly tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 defined(CONFIG_FSNOTIFY)
1688 static struct workqueue_struct *fsnotify_wq;
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1692 struct trace_array *tr = container_of(work, struct trace_array,
1694 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1699 struct trace_array *tr = container_of(iwork, struct trace_array,
1701 queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 struct dentry *d_tracer)
1707 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 d_tracer, &tr->max_latency,
1711 &tracing_max_lat_fops);
1714 __init static int latency_fsnotify_init(void)
1716 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 WQ_UNBOUND | WQ_HIGHPRI, 0);
1719 pr_err("Unable to allocate tr_max_lat_wq\n");
1725 late_initcall_sync(latency_fsnotify_init);
1727 void latency_fsnotify(struct trace_array *tr)
1732 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 * possible that we are called from __schedule() or do_idle(), which
1734 * could cause a deadlock.
1736 irq_work_queue(&tr->fsnotify_irqwork);
1740 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741 * defined(CONFIG_FSNOTIFY)
1745 #define trace_create_maxlat_file(tr, d_tracer) \
1746 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1747 &tr->max_latency, &tracing_max_lat_fops)
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1753 * Copy the new maximum trace into the separate maximum-trace
1754 * structure. (this way the maximum trace is permanently saved,
1755 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1760 struct array_buffer *trace_buf = &tr->array_buffer;
1761 struct array_buffer *max_buf = &tr->max_buffer;
1762 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766 max_buf->time_start = data->preempt_timestamp;
1768 max_data->saved_latency = tr->max_latency;
1769 max_data->critical_start = data->critical_start;
1770 max_data->critical_end = data->critical_end;
1772 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 max_data->pid = tsk->pid;
1775 * If tsk == current, then use current_uid(), as that does not use
1776 * RCU. The irq tracer can be called out of RCU scope.
1779 max_data->uid = current_uid();
1781 max_data->uid = task_uid(tsk);
1783 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 max_data->policy = tsk->policy;
1785 max_data->rt_priority = tsk->rt_priority;
1787 /* record this tasks comm */
1788 tracing_record_cmdline(tsk);
1789 latency_fsnotify(tr);
1793 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1795 * @tsk: the task with the latency
1796 * @cpu: The cpu that initiated the trace.
1797 * @cond_data: User data associated with a conditional snapshot
1799 * Flip the buffers between the @tr and the max_tr and record information
1800 * about which task was the cause of this latency.
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 WARN_ON_ONCE(!irqs_disabled());
1811 if (!tr->allocated_snapshot) {
1812 /* Only the nop tracer should hit this when disabling */
1813 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1817 arch_spin_lock(&tr->max_lock);
1819 /* Inherit the recordable setting from array_buffer */
1820 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 ring_buffer_record_on(tr->max_buffer.buffer);
1823 ring_buffer_record_off(tr->max_buffer.buffer);
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1831 __update_max_tr(tr, tsk, cpu);
1834 arch_spin_unlock(&tr->max_lock);
1838 * update_max_tr_single - only copy one trace over, and reset the rest
1840 * @tsk: task with the latency
1841 * @cpu: the cpu of the buffer to copy.
1843 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1853 WARN_ON_ONCE(!irqs_disabled());
1854 if (!tr->allocated_snapshot) {
1855 /* Only the nop tracer should hit this when disabling */
1856 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1860 arch_spin_lock(&tr->max_lock);
1862 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1864 if (ret == -EBUSY) {
1866 * We failed to swap the buffer due to a commit taking
1867 * place on this CPU. We fail to record, but we reset
1868 * the max trace buffer (no one writes directly to it)
1869 * and flag that it failed.
1871 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 "Failed to swap buffers due to commit in progress\n");
1875 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1877 __update_max_tr(tr, tsk, cpu);
1878 arch_spin_unlock(&tr->max_lock);
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1884 /* Iterators are static, they should be filled or empty */
1885 if (trace_buffer_iter(iter, iter->cpu_file))
1888 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1895 struct trace_selftests {
1896 struct list_head list;
1897 struct tracer *type;
1900 static LIST_HEAD(postponed_selftests);
1902 static int save_selftest(struct tracer *type)
1904 struct trace_selftests *selftest;
1906 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1910 selftest->type = type;
1911 list_add(&selftest->list, &postponed_selftests);
1915 static int run_tracer_selftest(struct tracer *type)
1917 struct trace_array *tr = &global_trace;
1918 struct tracer *saved_tracer = tr->current_trace;
1921 if (!type->selftest || tracing_selftest_disabled)
1925 * If a tracer registers early in boot up (before scheduling is
1926 * initialized and such), then do not run its selftests yet.
1927 * Instead, run it a little later in the boot process.
1929 if (!selftests_can_run)
1930 return save_selftest(type);
1932 if (!tracing_is_on()) {
1933 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939 * Run a selftest on this tracer.
1940 * Here we reset the trace buffer, and set the current
1941 * tracer to be this tracer. The tracer can then run some
1942 * internal tracing to verify that everything is in order.
1943 * If we fail, we do not register this tracer.
1945 tracing_reset_online_cpus(&tr->array_buffer);
1947 tr->current_trace = type;
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950 if (type->use_max_tr) {
1951 /* If we expanded the buffers, make sure the max is expanded too */
1952 if (ring_buffer_expanded)
1953 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954 RING_BUFFER_ALL_CPUS);
1955 tr->allocated_snapshot = true;
1959 /* the test is responsible for initializing and enabling */
1960 pr_info("Testing tracer %s: ", type->name);
1961 ret = type->selftest(type, tr);
1962 /* the test is responsible for resetting too */
1963 tr->current_trace = saved_tracer;
1965 printk(KERN_CONT "FAILED!\n");
1966 /* Add the warning after printing 'FAILED' */
1970 /* Only reset on passing, to avoid touching corrupted buffers */
1971 tracing_reset_online_cpus(&tr->array_buffer);
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 if (type->use_max_tr) {
1975 tr->allocated_snapshot = false;
1977 /* Shrink the max buffer again */
1978 if (ring_buffer_expanded)
1979 ring_buffer_resize(tr->max_buffer.buffer, 1,
1980 RING_BUFFER_ALL_CPUS);
1984 printk(KERN_CONT "PASSED\n");
1988 static __init int init_trace_selftests(void)
1990 struct trace_selftests *p, *n;
1991 struct tracer *t, **last;
1994 selftests_can_run = true;
1996 mutex_lock(&trace_types_lock);
1998 if (list_empty(&postponed_selftests))
2001 pr_info("Running postponed tracer tests:\n");
2003 tracing_selftest_running = true;
2004 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005 /* This loop can take minutes when sanitizers are enabled, so
2006 * lets make sure we allow RCU processing.
2009 ret = run_tracer_selftest(p->type);
2010 /* If the test fails, then warn and remove from available_tracers */
2012 WARN(1, "tracer: %s failed selftest, disabling\n",
2014 last = &trace_types;
2015 for (t = trace_types; t; t = t->next) {
2026 tracing_selftest_running = false;
2029 mutex_unlock(&trace_types_lock);
2033 core_initcall(init_trace_selftests);
2035 static inline int run_tracer_selftest(struct tracer *type)
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2043 static void __init apply_trace_boot_options(void);
2046 * register_tracer - register a tracer with the ftrace system.
2047 * @type: the plugin for the tracer
2049 * Register a new plugin tracer.
2051 int __init register_tracer(struct tracer *type)
2057 pr_info("Tracer must have a name\n");
2061 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2066 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067 pr_warn("Can not register tracer %s due to lockdown\n",
2072 mutex_lock(&trace_types_lock);
2074 tracing_selftest_running = true;
2076 for (t = trace_types; t; t = t->next) {
2077 if (strcmp(type->name, t->name) == 0) {
2079 pr_info("Tracer %s already registered\n",
2086 if (!type->set_flag)
2087 type->set_flag = &dummy_set_flag;
2089 /*allocate a dummy tracer_flags*/
2090 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2095 type->flags->val = 0;
2096 type->flags->opts = dummy_tracer_opt;
2098 if (!type->flags->opts)
2099 type->flags->opts = dummy_tracer_opt;
2101 /* store the tracer for __set_tracer_option */
2102 type->flags->trace = type;
2104 ret = run_tracer_selftest(type);
2108 type->next = trace_types;
2110 add_tracer_options(&global_trace, type);
2113 tracing_selftest_running = false;
2114 mutex_unlock(&trace_types_lock);
2116 if (ret || !default_bootup_tracer)
2119 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2122 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123 /* Do we want this tracer to start on bootup? */
2124 tracing_set_tracer(&global_trace, type->name);
2125 default_bootup_tracer = NULL;
2127 apply_trace_boot_options();
2129 /* disable other selftests, since this will break it. */
2130 disable_tracing_selftest("running a tracer");
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2138 struct trace_buffer *buffer = buf->buffer;
2143 ring_buffer_record_disable(buffer);
2145 /* Make sure all commits have finished */
2147 ring_buffer_reset_cpu(buffer, cpu);
2149 ring_buffer_record_enable(buffer);
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2154 struct trace_buffer *buffer = buf->buffer;
2159 ring_buffer_record_disable(buffer);
2161 /* Make sure all commits have finished */
2164 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2166 ring_buffer_reset_online_cpus(buffer);
2168 ring_buffer_record_enable(buffer);
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2174 struct trace_array *tr;
2176 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177 if (!tr->clear_trace)
2179 tr->clear_trace = false;
2180 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182 tracing_reset_online_cpus(&tr->max_buffer);
2188 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2189 * is the tgid last observed corresponding to pid=i.
2191 static int *tgid_map;
2193 /* The maximum valid index into tgid_map. */
2194 static size_t tgid_map_max;
2196 #define SAVED_CMDLINES_DEFAULT 128
2197 #define NO_CMDLINE_MAP UINT_MAX
2198 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2199 struct saved_cmdlines_buffer {
2200 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2201 unsigned *map_cmdline_to_pid;
2202 unsigned cmdline_num;
2204 char *saved_cmdlines;
2206 static struct saved_cmdlines_buffer *savedcmd;
2208 static inline char *get_saved_cmdlines(int idx)
2210 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2213 static inline void set_cmdline(int idx, const char *cmdline)
2215 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2218 static int allocate_cmdlines_buffer(unsigned int val,
2219 struct saved_cmdlines_buffer *s)
2221 s->map_cmdline_to_pid = kmalloc_array(val,
2222 sizeof(*s->map_cmdline_to_pid),
2224 if (!s->map_cmdline_to_pid)
2227 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2228 if (!s->saved_cmdlines) {
2229 kfree(s->map_cmdline_to_pid);
2234 s->cmdline_num = val;
2235 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2236 sizeof(s->map_pid_to_cmdline));
2237 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2238 val * sizeof(*s->map_cmdline_to_pid));
2243 static int trace_create_savedcmd(void)
2247 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2251 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2261 int is_tracing_stopped(void)
2263 return global_trace.stop_count;
2267 * tracing_start - quick start of the tracer
2269 * If tracing is enabled but was stopped by tracing_stop,
2270 * this will start the tracer back up.
2272 void tracing_start(void)
2274 struct trace_buffer *buffer;
2275 unsigned long flags;
2277 if (tracing_disabled)
2280 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2281 if (--global_trace.stop_count) {
2282 if (global_trace.stop_count < 0) {
2283 /* Someone screwed up their debugging */
2285 global_trace.stop_count = 0;
2290 /* Prevent the buffers from switching */
2291 arch_spin_lock(&global_trace.max_lock);
2293 buffer = global_trace.array_buffer.buffer;
2295 ring_buffer_record_enable(buffer);
2297 #ifdef CONFIG_TRACER_MAX_TRACE
2298 buffer = global_trace.max_buffer.buffer;
2300 ring_buffer_record_enable(buffer);
2303 arch_spin_unlock(&global_trace.max_lock);
2306 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2309 static void tracing_start_tr(struct trace_array *tr)
2311 struct trace_buffer *buffer;
2312 unsigned long flags;
2314 if (tracing_disabled)
2317 /* If global, we need to also start the max tracer */
2318 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2319 return tracing_start();
2321 raw_spin_lock_irqsave(&tr->start_lock, flags);
2323 if (--tr->stop_count) {
2324 if (tr->stop_count < 0) {
2325 /* Someone screwed up their debugging */
2332 buffer = tr->array_buffer.buffer;
2334 ring_buffer_record_enable(buffer);
2337 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2341 * tracing_stop - quick stop of the tracer
2343 * Light weight way to stop tracing. Use in conjunction with
2346 void tracing_stop(void)
2348 struct trace_buffer *buffer;
2349 unsigned long flags;
2351 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2352 if (global_trace.stop_count++)
2355 /* Prevent the buffers from switching */
2356 arch_spin_lock(&global_trace.max_lock);
2358 buffer = global_trace.array_buffer.buffer;
2360 ring_buffer_record_disable(buffer);
2362 #ifdef CONFIG_TRACER_MAX_TRACE
2363 buffer = global_trace.max_buffer.buffer;
2365 ring_buffer_record_disable(buffer);
2368 arch_spin_unlock(&global_trace.max_lock);
2371 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2374 static void tracing_stop_tr(struct trace_array *tr)
2376 struct trace_buffer *buffer;
2377 unsigned long flags;
2379 /* If global, we need to also stop the max tracer */
2380 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2381 return tracing_stop();
2383 raw_spin_lock_irqsave(&tr->start_lock, flags);
2384 if (tr->stop_count++)
2387 buffer = tr->array_buffer.buffer;
2389 ring_buffer_record_disable(buffer);
2392 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2395 static int trace_save_cmdline(struct task_struct *tsk)
2399 /* treat recording of idle task as a success */
2403 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2406 * It's not the end of the world if we don't get
2407 * the lock, but we also don't want to spin
2408 * nor do we want to disable interrupts,
2409 * so if we miss here, then better luck next time.
2411 if (!arch_spin_trylock(&trace_cmdline_lock))
2414 idx = savedcmd->map_pid_to_cmdline[tpid];
2415 if (idx == NO_CMDLINE_MAP) {
2416 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2418 savedcmd->map_pid_to_cmdline[tpid] = idx;
2419 savedcmd->cmdline_idx = idx;
2422 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2423 set_cmdline(idx, tsk->comm);
2425 arch_spin_unlock(&trace_cmdline_lock);
2430 static void __trace_find_cmdline(int pid, char comm[])
2436 strcpy(comm, "<idle>");
2440 if (WARN_ON_ONCE(pid < 0)) {
2441 strcpy(comm, "<XXX>");
2445 tpid = pid & (PID_MAX_DEFAULT - 1);
2446 map = savedcmd->map_pid_to_cmdline[tpid];
2447 if (map != NO_CMDLINE_MAP) {
2448 tpid = savedcmd->map_cmdline_to_pid[map];
2450 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2454 strcpy(comm, "<...>");
2457 void trace_find_cmdline(int pid, char comm[])
2460 arch_spin_lock(&trace_cmdline_lock);
2462 __trace_find_cmdline(pid, comm);
2464 arch_spin_unlock(&trace_cmdline_lock);
2468 static int *trace_find_tgid_ptr(int pid)
2471 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2472 * if we observe a non-NULL tgid_map then we also observe the correct
2475 int *map = smp_load_acquire(&tgid_map);
2477 if (unlikely(!map || pid > tgid_map_max))
2483 int trace_find_tgid(int pid)
2485 int *ptr = trace_find_tgid_ptr(pid);
2487 return ptr ? *ptr : 0;
2490 static int trace_save_tgid(struct task_struct *tsk)
2494 /* treat recording of idle task as a success */
2498 ptr = trace_find_tgid_ptr(tsk->pid);
2506 static bool tracing_record_taskinfo_skip(int flags)
2508 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2510 if (!__this_cpu_read(trace_taskinfo_save))
2516 * tracing_record_taskinfo - record the task info of a task
2518 * @task: task to record
2519 * @flags: TRACE_RECORD_CMDLINE for recording comm
2520 * TRACE_RECORD_TGID for recording tgid
2522 void tracing_record_taskinfo(struct task_struct *task, int flags)
2526 if (tracing_record_taskinfo_skip(flags))
2530 * Record as much task information as possible. If some fail, continue
2531 * to try to record the others.
2533 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2534 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2536 /* If recording any information failed, retry again soon. */
2540 __this_cpu_write(trace_taskinfo_save, false);
2544 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2546 * @prev: previous task during sched_switch
2547 * @next: next task during sched_switch
2548 * @flags: TRACE_RECORD_CMDLINE for recording comm
2549 * TRACE_RECORD_TGID for recording tgid
2551 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2552 struct task_struct *next, int flags)
2556 if (tracing_record_taskinfo_skip(flags))
2560 * Record as much task information as possible. If some fail, continue
2561 * to try to record the others.
2563 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2564 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2565 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2566 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2568 /* If recording any information failed, retry again soon. */
2572 __this_cpu_write(trace_taskinfo_save, false);
2575 /* Helpers to record a specific task information */
2576 void tracing_record_cmdline(struct task_struct *task)
2578 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2581 void tracing_record_tgid(struct task_struct *task)
2583 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2587 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2588 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2589 * simplifies those functions and keeps them in sync.
2591 enum print_line_t trace_handle_return(struct trace_seq *s)
2593 return trace_seq_has_overflowed(s) ?
2594 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2596 EXPORT_SYMBOL_GPL(trace_handle_return);
2598 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2600 unsigned int trace_flags = irqs_status;
2603 pc = preempt_count();
2606 trace_flags |= TRACE_FLAG_NMI;
2607 if (pc & HARDIRQ_MASK)
2608 trace_flags |= TRACE_FLAG_HARDIRQ;
2609 if (in_serving_softirq())
2610 trace_flags |= TRACE_FLAG_SOFTIRQ;
2612 if (tif_need_resched())
2613 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2614 if (test_preempt_need_resched())
2615 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2616 return (trace_flags << 16) | (pc & 0xff);
2619 struct ring_buffer_event *
2620 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2623 unsigned int trace_ctx)
2625 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2628 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2629 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2630 static int trace_buffered_event_ref;
2633 * trace_buffered_event_enable - enable buffering events
2635 * When events are being filtered, it is quicker to use a temporary
2636 * buffer to write the event data into if there's a likely chance
2637 * that it will not be committed. The discard of the ring buffer
2638 * is not as fast as committing, and is much slower than copying
2641 * When an event is to be filtered, allocate per cpu buffers to
2642 * write the event data into, and if the event is filtered and discarded
2643 * it is simply dropped, otherwise, the entire data is to be committed
2646 void trace_buffered_event_enable(void)
2648 struct ring_buffer_event *event;
2652 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2654 if (trace_buffered_event_ref++)
2657 for_each_tracing_cpu(cpu) {
2658 page = alloc_pages_node(cpu_to_node(cpu),
2659 GFP_KERNEL | __GFP_NORETRY, 0);
2663 event = page_address(page);
2664 memset(event, 0, sizeof(*event));
2666 per_cpu(trace_buffered_event, cpu) = event;
2669 if (cpu == smp_processor_id() &&
2670 __this_cpu_read(trace_buffered_event) !=
2671 per_cpu(trace_buffered_event, cpu))
2678 trace_buffered_event_disable();
2681 static void enable_trace_buffered_event(void *data)
2683 /* Probably not needed, but do it anyway */
2685 this_cpu_dec(trace_buffered_event_cnt);
2688 static void disable_trace_buffered_event(void *data)
2690 this_cpu_inc(trace_buffered_event_cnt);
2694 * trace_buffered_event_disable - disable buffering events
2696 * When a filter is removed, it is faster to not use the buffered
2697 * events, and to commit directly into the ring buffer. Free up
2698 * the temp buffers when there are no more users. This requires
2699 * special synchronization with current events.
2701 void trace_buffered_event_disable(void)
2705 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2710 if (--trace_buffered_event_ref)
2714 /* For each CPU, set the buffer as used. */
2715 smp_call_function_many(tracing_buffer_mask,
2716 disable_trace_buffered_event, NULL, 1);
2719 /* Wait for all current users to finish */
2722 for_each_tracing_cpu(cpu) {
2723 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2724 per_cpu(trace_buffered_event, cpu) = NULL;
2727 * Make sure trace_buffered_event is NULL before clearing
2728 * trace_buffered_event_cnt.
2733 /* Do the work on each cpu */
2734 smp_call_function_many(tracing_buffer_mask,
2735 enable_trace_buffered_event, NULL, 1);
2739 static struct trace_buffer *temp_buffer;
2741 struct ring_buffer_event *
2742 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2743 struct trace_event_file *trace_file,
2744 int type, unsigned long len,
2745 unsigned int trace_ctx)
2747 struct ring_buffer_event *entry;
2748 struct trace_array *tr = trace_file->tr;
2751 *current_rb = tr->array_buffer.buffer;
2753 if (!tr->no_filter_buffering_ref &&
2754 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2755 (entry = this_cpu_read(trace_buffered_event))) {
2756 /* Try to use the per cpu buffer first */
2757 val = this_cpu_inc_return(trace_buffered_event_cnt);
2758 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2759 trace_event_setup(entry, type, trace_ctx);
2760 entry->array[0] = len;
2763 this_cpu_dec(trace_buffered_event_cnt);
2766 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2769 * If tracing is off, but we have triggers enabled
2770 * we still need to look at the event data. Use the temp_buffer
2771 * to store the trace event for the trigger to use. It's recursive
2772 * safe and will not be recorded anywhere.
2774 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2775 *current_rb = temp_buffer;
2776 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2781 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2783 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2784 static DEFINE_MUTEX(tracepoint_printk_mutex);
2786 static void output_printk(struct trace_event_buffer *fbuffer)
2788 struct trace_event_call *event_call;
2789 struct trace_event_file *file;
2790 struct trace_event *event;
2791 unsigned long flags;
2792 struct trace_iterator *iter = tracepoint_print_iter;
2794 /* We should never get here if iter is NULL */
2795 if (WARN_ON_ONCE(!iter))
2798 event_call = fbuffer->trace_file->event_call;
2799 if (!event_call || !event_call->event.funcs ||
2800 !event_call->event.funcs->trace)
2803 file = fbuffer->trace_file;
2804 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2805 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2806 !filter_match_preds(file->filter, fbuffer->entry)))
2809 event = &fbuffer->trace_file->event_call->event;
2811 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2812 trace_seq_init(&iter->seq);
2813 iter->ent = fbuffer->entry;
2814 event_call->event.funcs->trace(iter, 0, event);
2815 trace_seq_putc(&iter->seq, 0);
2816 printk("%s", iter->seq.buffer);
2818 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2821 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2822 void *buffer, size_t *lenp,
2825 int save_tracepoint_printk;
2828 mutex_lock(&tracepoint_printk_mutex);
2829 save_tracepoint_printk = tracepoint_printk;
2831 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2834 * This will force exiting early, as tracepoint_printk
2835 * is always zero when tracepoint_printk_iter is not allocated
2837 if (!tracepoint_print_iter)
2838 tracepoint_printk = 0;
2840 if (save_tracepoint_printk == tracepoint_printk)
2843 if (tracepoint_printk)
2844 static_key_enable(&tracepoint_printk_key.key);
2846 static_key_disable(&tracepoint_printk_key.key);
2849 mutex_unlock(&tracepoint_printk_mutex);
2854 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2856 enum event_trigger_type tt = ETT_NONE;
2857 struct trace_event_file *file = fbuffer->trace_file;
2859 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2860 fbuffer->entry, &tt))
2863 if (static_key_false(&tracepoint_printk_key.key))
2864 output_printk(fbuffer);
2866 if (static_branch_unlikely(&trace_event_exports_enabled))
2867 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2869 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2870 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2874 event_triggers_post_call(file, tt);
2877 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2882 * trace_buffer_unlock_commit_regs()
2883 * trace_event_buffer_commit()
2884 * trace_event_raw_event_xxx()
2886 # define STACK_SKIP 3
2888 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2889 struct trace_buffer *buffer,
2890 struct ring_buffer_event *event,
2891 unsigned int trace_ctx,
2892 struct pt_regs *regs)
2894 __buffer_unlock_commit(buffer, event);
2897 * If regs is not set, then skip the necessary functions.
2898 * Note, we can still get here via blktrace, wakeup tracer
2899 * and mmiotrace, but that's ok if they lose a function or
2900 * two. They are not that meaningful.
2902 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2903 ftrace_trace_userstack(tr, buffer, trace_ctx);
2907 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2910 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2911 struct ring_buffer_event *event)
2913 __buffer_unlock_commit(buffer, event);
2917 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2918 parent_ip, unsigned int trace_ctx)
2920 struct trace_event_call *call = &event_function;
2921 struct trace_buffer *buffer = tr->array_buffer.buffer;
2922 struct ring_buffer_event *event;
2923 struct ftrace_entry *entry;
2925 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2929 entry = ring_buffer_event_data(event);
2931 entry->parent_ip = parent_ip;
2933 if (!call_filter_check_discard(call, entry, buffer, event)) {
2934 if (static_branch_unlikely(&trace_function_exports_enabled))
2935 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2936 __buffer_unlock_commit(buffer, event);
2940 #ifdef CONFIG_STACKTRACE
2942 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2943 #define FTRACE_KSTACK_NESTING 4
2945 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2947 struct ftrace_stack {
2948 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2952 struct ftrace_stacks {
2953 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2956 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2957 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2959 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2960 unsigned int trace_ctx,
2961 int skip, struct pt_regs *regs)
2963 struct trace_event_call *call = &event_kernel_stack;
2964 struct ring_buffer_event *event;
2965 unsigned int size, nr_entries;
2966 struct ftrace_stack *fstack;
2967 struct stack_entry *entry;
2971 * Add one, for this function and the call to save_stack_trace()
2972 * If regs is set, then these functions will not be in the way.
2974 #ifndef CONFIG_UNWINDER_ORC
2979 preempt_disable_notrace();
2981 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2983 /* This should never happen. If it does, yell once and skip */
2984 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2988 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2989 * interrupt will either see the value pre increment or post
2990 * increment. If the interrupt happens pre increment it will have
2991 * restored the counter when it returns. We just need a barrier to
2992 * keep gcc from moving things around.
2996 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2997 size = ARRAY_SIZE(fstack->calls);
3000 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3003 nr_entries = stack_trace_save(fstack->calls, size, skip);
3006 size = nr_entries * sizeof(unsigned long);
3007 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3008 (sizeof(*entry) - sizeof(entry->caller)) + size,
3012 entry = ring_buffer_event_data(event);
3014 memcpy(&entry->caller, fstack->calls, size);
3015 entry->size = nr_entries;
3017 if (!call_filter_check_discard(call, entry, buffer, event))
3018 __buffer_unlock_commit(buffer, event);
3021 /* Again, don't let gcc optimize things here */
3023 __this_cpu_dec(ftrace_stack_reserve);
3024 preempt_enable_notrace();
3028 static inline void ftrace_trace_stack(struct trace_array *tr,
3029 struct trace_buffer *buffer,
3030 unsigned int trace_ctx,
3031 int skip, struct pt_regs *regs)
3033 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3036 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3039 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3042 struct trace_buffer *buffer = tr->array_buffer.buffer;
3044 if (rcu_is_watching()) {
3045 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3050 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3051 * but if the above rcu_is_watching() failed, then the NMI
3052 * triggered someplace critical, and rcu_irq_enter() should
3053 * not be called from NMI.
3055 if (unlikely(in_nmi()))
3058 rcu_irq_enter_irqson();
3059 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3060 rcu_irq_exit_irqson();
3064 * trace_dump_stack - record a stack back trace in the trace buffer
3065 * @skip: Number of functions to skip (helper handlers)
3067 void trace_dump_stack(int skip)
3069 if (tracing_disabled || tracing_selftest_running)
3072 #ifndef CONFIG_UNWINDER_ORC
3073 /* Skip 1 to skip this function. */
3076 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3077 tracing_gen_ctx(), skip, NULL);
3079 EXPORT_SYMBOL_GPL(trace_dump_stack);
3081 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3082 static DEFINE_PER_CPU(int, user_stack_count);
3085 ftrace_trace_userstack(struct trace_array *tr,
3086 struct trace_buffer *buffer, unsigned int trace_ctx)
3088 struct trace_event_call *call = &event_user_stack;
3089 struct ring_buffer_event *event;
3090 struct userstack_entry *entry;
3092 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3096 * NMIs can not handle page faults, even with fix ups.
3097 * The save user stack can (and often does) fault.
3099 if (unlikely(in_nmi()))
3103 * prevent recursion, since the user stack tracing may
3104 * trigger other kernel events.
3107 if (__this_cpu_read(user_stack_count))
3110 __this_cpu_inc(user_stack_count);
3112 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3113 sizeof(*entry), trace_ctx);
3115 goto out_drop_count;
3116 entry = ring_buffer_event_data(event);
3118 entry->tgid = current->tgid;
3119 memset(&entry->caller, 0, sizeof(entry->caller));
3121 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3122 if (!call_filter_check_discard(call, entry, buffer, event))
3123 __buffer_unlock_commit(buffer, event);
3126 __this_cpu_dec(user_stack_count);
3130 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3131 static void ftrace_trace_userstack(struct trace_array *tr,
3132 struct trace_buffer *buffer,
3133 unsigned int trace_ctx)
3136 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3138 #endif /* CONFIG_STACKTRACE */
3141 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3142 unsigned long long delta)
3144 entry->bottom_delta_ts = delta & U32_MAX;
3145 entry->top_delta_ts = (delta >> 32);
3148 void trace_last_func_repeats(struct trace_array *tr,
3149 struct trace_func_repeats *last_info,
3150 unsigned int trace_ctx)
3152 struct trace_buffer *buffer = tr->array_buffer.buffer;
3153 struct func_repeats_entry *entry;
3154 struct ring_buffer_event *event;
3157 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3158 sizeof(*entry), trace_ctx);
3162 delta = ring_buffer_event_time_stamp(buffer, event) -
3163 last_info->ts_last_call;
3165 entry = ring_buffer_event_data(event);
3166 entry->ip = last_info->ip;
3167 entry->parent_ip = last_info->parent_ip;
3168 entry->count = last_info->count;
3169 func_repeats_set_delta_ts(entry, delta);
3171 __buffer_unlock_commit(buffer, event);
3174 /* created for use with alloc_percpu */
3175 struct trace_buffer_struct {
3177 char buffer[4][TRACE_BUF_SIZE];
3180 static struct trace_buffer_struct *trace_percpu_buffer;
3183 * This allows for lockless recording. If we're nested too deeply, then
3184 * this returns NULL.
3186 static char *get_trace_buf(void)
3188 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3190 if (!buffer || buffer->nesting >= 4)
3195 /* Interrupts must see nesting incremented before we use the buffer */
3197 return &buffer->buffer[buffer->nesting - 1][0];
3200 static void put_trace_buf(void)
3202 /* Don't let the decrement of nesting leak before this */
3204 this_cpu_dec(trace_percpu_buffer->nesting);
3207 static int alloc_percpu_trace_buffer(void)
3209 struct trace_buffer_struct *buffers;
3211 if (trace_percpu_buffer)
3214 buffers = alloc_percpu(struct trace_buffer_struct);
3215 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3218 trace_percpu_buffer = buffers;
3222 static int buffers_allocated;
3224 void trace_printk_init_buffers(void)
3226 if (buffers_allocated)
3229 if (alloc_percpu_trace_buffer())
3232 /* trace_printk() is for debug use only. Don't use it in production. */
3235 pr_warn("**********************************************************\n");
3236 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3238 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3240 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3241 pr_warn("** unsafe for production use. **\n");
3243 pr_warn("** If you see this message and you are not debugging **\n");
3244 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3246 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3247 pr_warn("**********************************************************\n");
3249 /* Expand the buffers to set size */
3250 tracing_update_buffers();
3252 buffers_allocated = 1;
3255 * trace_printk_init_buffers() can be called by modules.
3256 * If that happens, then we need to start cmdline recording
3257 * directly here. If the global_trace.buffer is already
3258 * allocated here, then this was called by module code.
3260 if (global_trace.array_buffer.buffer)
3261 tracing_start_cmdline_record();
3263 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3265 void trace_printk_start_comm(void)
3267 /* Start tracing comms if trace printk is set */
3268 if (!buffers_allocated)
3270 tracing_start_cmdline_record();
3273 static void trace_printk_start_stop_comm(int enabled)
3275 if (!buffers_allocated)
3279 tracing_start_cmdline_record();
3281 tracing_stop_cmdline_record();
3285 * trace_vbprintk - write binary msg to tracing buffer
3286 * @ip: The address of the caller
3287 * @fmt: The string format to write to the buffer
3288 * @args: Arguments for @fmt
3290 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3292 struct trace_event_call *call = &event_bprint;
3293 struct ring_buffer_event *event;
3294 struct trace_buffer *buffer;
3295 struct trace_array *tr = &global_trace;
3296 struct bprint_entry *entry;
3297 unsigned int trace_ctx;
3301 if (unlikely(tracing_selftest_running || tracing_disabled))
3304 /* Don't pollute graph traces with trace_vprintk internals */
3305 pause_graph_tracing();
3307 trace_ctx = tracing_gen_ctx();
3308 preempt_disable_notrace();
3310 tbuffer = get_trace_buf();
3316 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3318 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3321 size = sizeof(*entry) + sizeof(u32) * len;
3322 buffer = tr->array_buffer.buffer;
3323 ring_buffer_nest_start(buffer);
3324 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3328 entry = ring_buffer_event_data(event);
3332 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3333 if (!call_filter_check_discard(call, entry, buffer, event)) {
3334 __buffer_unlock_commit(buffer, event);
3335 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3339 ring_buffer_nest_end(buffer);
3344 preempt_enable_notrace();
3345 unpause_graph_tracing();
3349 EXPORT_SYMBOL_GPL(trace_vbprintk);
3353 __trace_array_vprintk(struct trace_buffer *buffer,
3354 unsigned long ip, const char *fmt, va_list args)
3356 struct trace_event_call *call = &event_print;
3357 struct ring_buffer_event *event;
3359 struct print_entry *entry;
3360 unsigned int trace_ctx;
3363 if (tracing_disabled || tracing_selftest_running)
3366 /* Don't pollute graph traces with trace_vprintk internals */
3367 pause_graph_tracing();
3369 trace_ctx = tracing_gen_ctx();
3370 preempt_disable_notrace();
3373 tbuffer = get_trace_buf();
3379 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3381 size = sizeof(*entry) + len + 1;
3382 ring_buffer_nest_start(buffer);
3383 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3387 entry = ring_buffer_event_data(event);
3390 memcpy(&entry->buf, tbuffer, len + 1);
3391 if (!call_filter_check_discard(call, entry, buffer, event)) {
3392 __buffer_unlock_commit(buffer, event);
3393 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3397 ring_buffer_nest_end(buffer);
3401 preempt_enable_notrace();
3402 unpause_graph_tracing();
3408 int trace_array_vprintk(struct trace_array *tr,
3409 unsigned long ip, const char *fmt, va_list args)
3411 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3415 * trace_array_printk - Print a message to a specific instance
3416 * @tr: The instance trace_array descriptor
3417 * @ip: The instruction pointer that this is called from.
3418 * @fmt: The format to print (printf format)
3420 * If a subsystem sets up its own instance, they have the right to
3421 * printk strings into their tracing instance buffer using this
3422 * function. Note, this function will not write into the top level
3423 * buffer (use trace_printk() for that), as writing into the top level
3424 * buffer should only have events that can be individually disabled.
3425 * trace_printk() is only used for debugging a kernel, and should not
3426 * be ever incorporated in normal use.
3428 * trace_array_printk() can be used, as it will not add noise to the
3429 * top level tracing buffer.
3431 * Note, trace_array_init_printk() must be called on @tr before this
3435 int trace_array_printk(struct trace_array *tr,
3436 unsigned long ip, const char *fmt, ...)
3444 /* This is only allowed for created instances */
3445 if (tr == &global_trace)
3448 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3452 ret = trace_array_vprintk(tr, ip, fmt, ap);
3456 EXPORT_SYMBOL_GPL(trace_array_printk);
3459 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3460 * @tr: The trace array to initialize the buffers for
3462 * As trace_array_printk() only writes into instances, they are OK to
3463 * have in the kernel (unlike trace_printk()). This needs to be called
3464 * before trace_array_printk() can be used on a trace_array.
3466 int trace_array_init_printk(struct trace_array *tr)
3471 /* This is only allowed for created instances */
3472 if (tr == &global_trace)
3475 return alloc_percpu_trace_buffer();
3477 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3480 int trace_array_printk_buf(struct trace_buffer *buffer,
3481 unsigned long ip, const char *fmt, ...)
3486 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3490 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3496 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3498 return trace_array_vprintk(&global_trace, ip, fmt, args);
3500 EXPORT_SYMBOL_GPL(trace_vprintk);
3502 static void trace_iterator_increment(struct trace_iterator *iter)
3504 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3508 ring_buffer_iter_advance(buf_iter);
3511 static struct trace_entry *
3512 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3513 unsigned long *lost_events)
3515 struct ring_buffer_event *event;
3516 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3519 event = ring_buffer_iter_peek(buf_iter, ts);
3521 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3522 (unsigned long)-1 : 0;
3524 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3529 iter->ent_size = ring_buffer_event_length(event);
3530 return ring_buffer_event_data(event);
3536 static struct trace_entry *
3537 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3538 unsigned long *missing_events, u64 *ent_ts)
3540 struct trace_buffer *buffer = iter->array_buffer->buffer;
3541 struct trace_entry *ent, *next = NULL;
3542 unsigned long lost_events = 0, next_lost = 0;
3543 int cpu_file = iter->cpu_file;
3544 u64 next_ts = 0, ts;
3550 * If we are in a per_cpu trace file, don't bother by iterating over
3551 * all cpu and peek directly.
3553 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3554 if (ring_buffer_empty_cpu(buffer, cpu_file))
3556 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3558 *ent_cpu = cpu_file;
3563 for_each_tracing_cpu(cpu) {
3565 if (ring_buffer_empty_cpu(buffer, cpu))
3568 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3571 * Pick the entry with the smallest timestamp:
3573 if (ent && (!next || ts < next_ts)) {
3577 next_lost = lost_events;
3578 next_size = iter->ent_size;
3582 iter->ent_size = next_size;
3585 *ent_cpu = next_cpu;
3591 *missing_events = next_lost;
3596 #define STATIC_FMT_BUF_SIZE 128
3597 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3599 static char *trace_iter_expand_format(struct trace_iterator *iter)
3604 * iter->tr is NULL when used with tp_printk, which makes
3605 * this get called where it is not safe to call krealloc().
3607 if (!iter->tr || iter->fmt == static_fmt_buf)
3610 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3613 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3620 /* Returns true if the string is safe to dereference from an event */
3621 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3623 unsigned long addr = (unsigned long)str;
3624 struct trace_event *trace_event;
3625 struct trace_event_call *event;
3627 /* OK if part of the event data */
3628 if ((addr >= (unsigned long)iter->ent) &&
3629 (addr < (unsigned long)iter->ent + iter->ent_size))
3632 /* OK if part of the temp seq buffer */
3633 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3634 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3637 /* Core rodata can not be freed */
3638 if (is_kernel_rodata(addr))
3641 if (trace_is_tracepoint_string(str))
3645 * Now this could be a module event, referencing core module
3646 * data, which is OK.
3651 trace_event = ftrace_find_event(iter->ent->type);
3655 event = container_of(trace_event, struct trace_event_call, event);
3659 /* Would rather have rodata, but this will suffice */
3660 if (within_module_core(addr, event->mod))
3666 static const char *show_buffer(struct trace_seq *s)
3668 struct seq_buf *seq = &s->seq;
3670 seq_buf_terminate(seq);
3675 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3677 static int test_can_verify_check(const char *fmt, ...)
3684 * The verifier is dependent on vsnprintf() modifies the va_list
3685 * passed to it, where it is sent as a reference. Some architectures
3686 * (like x86_32) passes it by value, which means that vsnprintf()
3687 * does not modify the va_list passed to it, and the verifier
3688 * would then need to be able to understand all the values that
3689 * vsnprintf can use. If it is passed by value, then the verifier
3693 vsnprintf(buf, 16, "%d", ap);
3694 ret = va_arg(ap, int);
3700 static void test_can_verify(void)
3702 if (!test_can_verify_check("%d %d", 0, 1)) {
3703 pr_info("trace event string verifier disabled\n");
3704 static_branch_inc(&trace_no_verify);
3709 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3710 * @iter: The iterator that holds the seq buffer and the event being printed
3711 * @fmt: The format used to print the event
3712 * @ap: The va_list holding the data to print from @fmt.
3714 * This writes the data into the @iter->seq buffer using the data from
3715 * @fmt and @ap. If the format has a %s, then the source of the string
3716 * is examined to make sure it is safe to print, otherwise it will
3717 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3720 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3723 const char *p = fmt;
3727 if (WARN_ON_ONCE(!fmt))
3730 if (static_branch_unlikely(&trace_no_verify))
3733 /* Don't bother checking when doing a ftrace_dump() */
3734 if (iter->fmt == static_fmt_buf)
3743 /* We only care about %s and variants */
3744 for (i = 0; p[i]; i++) {
3745 if (i + 1 >= iter->fmt_size) {
3747 * If we can't expand the copy buffer,
3750 if (!trace_iter_expand_format(iter))
3754 if (p[i] == '\\' && p[i+1]) {
3759 /* Need to test cases like %08.*s */
3760 for (j = 1; p[i+j]; j++) {
3761 if (isdigit(p[i+j]) ||
3764 if (p[i+j] == '*') {
3776 /* If no %s found then just print normally */
3780 /* Copy up to the %s, and print that */
3781 strncpy(iter->fmt, p, i);
3782 iter->fmt[i] = '\0';
3783 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3786 len = va_arg(ap, int);
3788 /* The ap now points to the string data of the %s */
3789 str = va_arg(ap, const char *);
3792 * If you hit this warning, it is likely that the
3793 * trace event in question used %s on a string that
3794 * was saved at the time of the event, but may not be
3795 * around when the trace is read. Use __string(),
3796 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3797 * instead. See samples/trace_events/trace-events-sample.h
3800 if (WARN_ONCE(!trace_safe_str(iter, str),
3801 "fmt: '%s' current_buffer: '%s'",
3802 fmt, show_buffer(&iter->seq))) {
3805 /* Try to safely read the string */
3807 if (len + 1 > iter->fmt_size)
3808 len = iter->fmt_size - 1;
3811 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3815 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3819 trace_seq_printf(&iter->seq, "(0x%px)", str);
3821 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3823 str = "[UNSAFE-MEMORY]";
3824 strcpy(iter->fmt, "%s");
3826 strncpy(iter->fmt, p + i, j + 1);
3827 iter->fmt[j+1] = '\0';
3830 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3832 trace_seq_printf(&iter->seq, iter->fmt, str);
3838 trace_seq_vprintf(&iter->seq, p, ap);
3841 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3843 const char *p, *new_fmt;
3846 if (WARN_ON_ONCE(!fmt))
3849 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3853 new_fmt = q = iter->fmt;
3855 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3856 if (!trace_iter_expand_format(iter))
3859 q += iter->fmt - new_fmt;
3860 new_fmt = iter->fmt;
3865 /* Replace %p with %px */
3869 } else if (p[0] == 'p' && !isalnum(p[1])) {
3880 #define STATIC_TEMP_BUF_SIZE 128
3881 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3883 /* Find the next real entry, without updating the iterator itself */
3884 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3885 int *ent_cpu, u64 *ent_ts)
3887 /* __find_next_entry will reset ent_size */
3888 int ent_size = iter->ent_size;
3889 struct trace_entry *entry;
3892 * If called from ftrace_dump(), then the iter->temp buffer
3893 * will be the static_temp_buf and not created from kmalloc.
3894 * If the entry size is greater than the buffer, we can
3895 * not save it. Just return NULL in that case. This is only
3896 * used to add markers when two consecutive events' time
3897 * stamps have a large delta. See trace_print_lat_context()
3899 if (iter->temp == static_temp_buf &&
3900 STATIC_TEMP_BUF_SIZE < ent_size)
3904 * The __find_next_entry() may call peek_next_entry(), which may
3905 * call ring_buffer_peek() that may make the contents of iter->ent
3906 * undefined. Need to copy iter->ent now.
3908 if (iter->ent && iter->ent != iter->temp) {
3909 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3910 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3912 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3917 iter->temp_size = iter->ent_size;
3919 memcpy(iter->temp, iter->ent, iter->ent_size);
3920 iter->ent = iter->temp;
3922 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3923 /* Put back the original ent_size */
3924 iter->ent_size = ent_size;
3929 /* Find the next real entry, and increment the iterator to the next entry */
3930 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3932 iter->ent = __find_next_entry(iter, &iter->cpu,
3933 &iter->lost_events, &iter->ts);
3936 trace_iterator_increment(iter);
3938 return iter->ent ? iter : NULL;
3941 static void trace_consume(struct trace_iterator *iter)
3943 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3944 &iter->lost_events);
3947 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3949 struct trace_iterator *iter = m->private;
3953 WARN_ON_ONCE(iter->leftover);
3957 /* can't go backwards */
3962 ent = trace_find_next_entry_inc(iter);
3966 while (ent && iter->idx < i)
3967 ent = trace_find_next_entry_inc(iter);
3974 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3976 struct ring_buffer_iter *buf_iter;
3977 unsigned long entries = 0;
3980 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3982 buf_iter = trace_buffer_iter(iter, cpu);
3986 ring_buffer_iter_reset(buf_iter);
3989 * We could have the case with the max latency tracers
3990 * that a reset never took place on a cpu. This is evident
3991 * by the timestamp being before the start of the buffer.
3993 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3994 if (ts >= iter->array_buffer->time_start)
3997 ring_buffer_iter_advance(buf_iter);
4000 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4004 * The current tracer is copied to avoid a global locking
4007 static void *s_start(struct seq_file *m, loff_t *pos)
4009 struct trace_iterator *iter = m->private;
4010 struct trace_array *tr = iter->tr;
4011 int cpu_file = iter->cpu_file;
4017 * copy the tracer to avoid using a global lock all around.
4018 * iter->trace is a copy of current_trace, the pointer to the
4019 * name may be used instead of a strcmp(), as iter->trace->name
4020 * will point to the same string as current_trace->name.
4022 mutex_lock(&trace_types_lock);
4023 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4024 *iter->trace = *tr->current_trace;
4025 mutex_unlock(&trace_types_lock);
4027 #ifdef CONFIG_TRACER_MAX_TRACE
4028 if (iter->snapshot && iter->trace->use_max_tr)
4029 return ERR_PTR(-EBUSY);
4032 if (*pos != iter->pos) {
4037 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4038 for_each_tracing_cpu(cpu)
4039 tracing_iter_reset(iter, cpu);
4041 tracing_iter_reset(iter, cpu_file);
4044 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4049 * If we overflowed the seq_file before, then we want
4050 * to just reuse the trace_seq buffer again.
4056 p = s_next(m, p, &l);
4060 trace_event_read_lock();
4061 trace_access_lock(cpu_file);
4065 static void s_stop(struct seq_file *m, void *p)
4067 struct trace_iterator *iter = m->private;
4069 #ifdef CONFIG_TRACER_MAX_TRACE
4070 if (iter->snapshot && iter->trace->use_max_tr)
4074 trace_access_unlock(iter->cpu_file);
4075 trace_event_read_unlock();
4079 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4080 unsigned long *entries, int cpu)
4082 unsigned long count;
4084 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4086 * If this buffer has skipped entries, then we hold all
4087 * entries for the trace and we need to ignore the
4088 * ones before the time stamp.
4090 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4091 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4092 /* total is the same as the entries */
4096 ring_buffer_overrun_cpu(buf->buffer, cpu);
4101 get_total_entries(struct array_buffer *buf,
4102 unsigned long *total, unsigned long *entries)
4110 for_each_tracing_cpu(cpu) {
4111 get_total_entries_cpu(buf, &t, &e, cpu);
4117 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4119 unsigned long total, entries;
4124 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4129 unsigned long trace_total_entries(struct trace_array *tr)
4131 unsigned long total, entries;
4136 get_total_entries(&tr->array_buffer, &total, &entries);
4141 static void print_lat_help_header(struct seq_file *m)
4143 seq_puts(m, "# _------=> CPU# \n"
4144 "# / _-----=> irqs-off \n"
4145 "# | / _----=> need-resched \n"
4146 "# || / _---=> hardirq/softirq \n"
4147 "# ||| / _--=> preempt-depth \n"
4149 "# cmd pid ||||| time | caller \n"
4150 "# \\ / ||||| \\ | / \n");
4153 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4155 unsigned long total;
4156 unsigned long entries;
4158 get_total_entries(buf, &total, &entries);
4159 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4160 entries, total, num_online_cpus());
4164 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4167 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4169 print_event_info(buf, m);
4171 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4172 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4175 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4178 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4179 const char *space = " ";
4180 int prec = tgid ? 12 : 2;
4182 print_event_info(buf, m);
4184 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
4185 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4186 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4187 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4188 seq_printf(m, "# %.*s||| / delay\n", prec, space);
4189 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4190 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
4194 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4196 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4197 struct array_buffer *buf = iter->array_buffer;
4198 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4199 struct tracer *type = iter->trace;
4200 unsigned long entries;
4201 unsigned long total;
4202 const char *name = "preemption";
4206 get_total_entries(buf, &total, &entries);
4208 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4210 seq_puts(m, "# -----------------------------------"
4211 "---------------------------------\n");
4212 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4213 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4214 nsecs_to_usecs(data->saved_latency),
4218 #if defined(CONFIG_PREEMPT_NONE)
4220 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4222 #elif defined(CONFIG_PREEMPT)
4224 #elif defined(CONFIG_PREEMPT_RT)
4229 /* These are reserved for later use */
4232 seq_printf(m, " #P:%d)\n", num_online_cpus());
4236 seq_puts(m, "# -----------------\n");
4237 seq_printf(m, "# | task: %.16s-%d "
4238 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4239 data->comm, data->pid,
4240 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4241 data->policy, data->rt_priority);
4242 seq_puts(m, "# -----------------\n");
4244 if (data->critical_start) {
4245 seq_puts(m, "# => started at: ");
4246 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4247 trace_print_seq(m, &iter->seq);
4248 seq_puts(m, "\n# => ended at: ");
4249 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4250 trace_print_seq(m, &iter->seq);
4251 seq_puts(m, "\n#\n");
4257 static void test_cpu_buff_start(struct trace_iterator *iter)
4259 struct trace_seq *s = &iter->seq;
4260 struct trace_array *tr = iter->tr;
4262 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4265 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4268 if (cpumask_available(iter->started) &&
4269 cpumask_test_cpu(iter->cpu, iter->started))
4272 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4275 if (cpumask_available(iter->started))
4276 cpumask_set_cpu(iter->cpu, iter->started);
4278 /* Don't print started cpu buffer for the first entry of the trace */
4280 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4284 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4286 struct trace_array *tr = iter->tr;
4287 struct trace_seq *s = &iter->seq;
4288 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4289 struct trace_entry *entry;
4290 struct trace_event *event;
4294 test_cpu_buff_start(iter);
4296 event = ftrace_find_event(entry->type);
4298 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4299 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4300 trace_print_lat_context(iter);
4302 trace_print_context(iter);
4305 if (trace_seq_has_overflowed(s))
4306 return TRACE_TYPE_PARTIAL_LINE;
4309 return event->funcs->trace(iter, sym_flags, event);
4311 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4313 return trace_handle_return(s);
4316 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4318 struct trace_array *tr = iter->tr;
4319 struct trace_seq *s = &iter->seq;
4320 struct trace_entry *entry;
4321 struct trace_event *event;
4325 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4326 trace_seq_printf(s, "%d %d %llu ",
4327 entry->pid, iter->cpu, iter->ts);
4329 if (trace_seq_has_overflowed(s))
4330 return TRACE_TYPE_PARTIAL_LINE;
4332 event = ftrace_find_event(entry->type);
4334 return event->funcs->raw(iter, 0, event);
4336 trace_seq_printf(s, "%d ?\n", entry->type);
4338 return trace_handle_return(s);
4341 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4343 struct trace_array *tr = iter->tr;
4344 struct trace_seq *s = &iter->seq;
4345 unsigned char newline = '\n';
4346 struct trace_entry *entry;
4347 struct trace_event *event;
4351 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4352 SEQ_PUT_HEX_FIELD(s, entry->pid);
4353 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4354 SEQ_PUT_HEX_FIELD(s, iter->ts);
4355 if (trace_seq_has_overflowed(s))
4356 return TRACE_TYPE_PARTIAL_LINE;
4359 event = ftrace_find_event(entry->type);
4361 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4362 if (ret != TRACE_TYPE_HANDLED)
4366 SEQ_PUT_FIELD(s, newline);
4368 return trace_handle_return(s);
4371 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4373 struct trace_array *tr = iter->tr;
4374 struct trace_seq *s = &iter->seq;
4375 struct trace_entry *entry;
4376 struct trace_event *event;
4380 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4381 SEQ_PUT_FIELD(s, entry->pid);
4382 SEQ_PUT_FIELD(s, iter->cpu);
4383 SEQ_PUT_FIELD(s, iter->ts);
4384 if (trace_seq_has_overflowed(s))
4385 return TRACE_TYPE_PARTIAL_LINE;
4388 event = ftrace_find_event(entry->type);
4389 return event ? event->funcs->binary(iter, 0, event) :
4393 int trace_empty(struct trace_iterator *iter)
4395 struct ring_buffer_iter *buf_iter;
4398 /* If we are looking at one CPU buffer, only check that one */
4399 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4400 cpu = iter->cpu_file;
4401 buf_iter = trace_buffer_iter(iter, cpu);
4403 if (!ring_buffer_iter_empty(buf_iter))
4406 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4412 for_each_tracing_cpu(cpu) {
4413 buf_iter = trace_buffer_iter(iter, cpu);
4415 if (!ring_buffer_iter_empty(buf_iter))
4418 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4426 /* Called with trace_event_read_lock() held. */
4427 enum print_line_t print_trace_line(struct trace_iterator *iter)
4429 struct trace_array *tr = iter->tr;
4430 unsigned long trace_flags = tr->trace_flags;
4431 enum print_line_t ret;
4433 if (iter->lost_events) {
4434 if (iter->lost_events == (unsigned long)-1)
4435 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4438 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4439 iter->cpu, iter->lost_events);
4440 if (trace_seq_has_overflowed(&iter->seq))
4441 return TRACE_TYPE_PARTIAL_LINE;
4444 if (iter->trace && iter->trace->print_line) {
4445 ret = iter->trace->print_line(iter);
4446 if (ret != TRACE_TYPE_UNHANDLED)
4450 if (iter->ent->type == TRACE_BPUTS &&
4451 trace_flags & TRACE_ITER_PRINTK &&
4452 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4453 return trace_print_bputs_msg_only(iter);
4455 if (iter->ent->type == TRACE_BPRINT &&
4456 trace_flags & TRACE_ITER_PRINTK &&
4457 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4458 return trace_print_bprintk_msg_only(iter);
4460 if (iter->ent->type == TRACE_PRINT &&
4461 trace_flags & TRACE_ITER_PRINTK &&
4462 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4463 return trace_print_printk_msg_only(iter);
4465 if (trace_flags & TRACE_ITER_BIN)
4466 return print_bin_fmt(iter);
4468 if (trace_flags & TRACE_ITER_HEX)
4469 return print_hex_fmt(iter);
4471 if (trace_flags & TRACE_ITER_RAW)
4472 return print_raw_fmt(iter);
4474 return print_trace_fmt(iter);
4477 void trace_latency_header(struct seq_file *m)
4479 struct trace_iterator *iter = m->private;
4480 struct trace_array *tr = iter->tr;
4482 /* print nothing if the buffers are empty */
4483 if (trace_empty(iter))
4486 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4487 print_trace_header(m, iter);
4489 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4490 print_lat_help_header(m);
4493 void trace_default_header(struct seq_file *m)
4495 struct trace_iterator *iter = m->private;
4496 struct trace_array *tr = iter->tr;
4497 unsigned long trace_flags = tr->trace_flags;
4499 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4502 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4503 /* print nothing if the buffers are empty */
4504 if (trace_empty(iter))
4506 print_trace_header(m, iter);
4507 if (!(trace_flags & TRACE_ITER_VERBOSE))
4508 print_lat_help_header(m);
4510 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4511 if (trace_flags & TRACE_ITER_IRQ_INFO)
4512 print_func_help_header_irq(iter->array_buffer,
4515 print_func_help_header(iter->array_buffer, m,
4521 static void test_ftrace_alive(struct seq_file *m)
4523 if (!ftrace_is_dead())
4525 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4526 "# MAY BE MISSING FUNCTION EVENTS\n");
4529 #ifdef CONFIG_TRACER_MAX_TRACE
4530 static void show_snapshot_main_help(struct seq_file *m)
4532 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4533 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4534 "# Takes a snapshot of the main buffer.\n"
4535 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4536 "# (Doesn't have to be '2' works with any number that\n"
4537 "# is not a '0' or '1')\n");
4540 static void show_snapshot_percpu_help(struct seq_file *m)
4542 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4543 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4544 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4545 "# Takes a snapshot of the main buffer for this cpu.\n");
4547 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4548 "# Must use main snapshot file to allocate.\n");
4550 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4551 "# (Doesn't have to be '2' works with any number that\n"
4552 "# is not a '0' or '1')\n");
4555 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4557 if (iter->tr->allocated_snapshot)
4558 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4560 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4562 seq_puts(m, "# Snapshot commands:\n");
4563 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4564 show_snapshot_main_help(m);
4566 show_snapshot_percpu_help(m);
4569 /* Should never be called */
4570 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4573 static int s_show(struct seq_file *m, void *v)
4575 struct trace_iterator *iter = v;
4578 if (iter->ent == NULL) {
4580 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4582 test_ftrace_alive(m);
4584 if (iter->snapshot && trace_empty(iter))
4585 print_snapshot_help(m, iter);
4586 else if (iter->trace && iter->trace->print_header)
4587 iter->trace->print_header(m);
4589 trace_default_header(m);
4591 } else if (iter->leftover) {
4593 * If we filled the seq_file buffer earlier, we
4594 * want to just show it now.
4596 ret = trace_print_seq(m, &iter->seq);
4598 /* ret should this time be zero, but you never know */
4599 iter->leftover = ret;
4602 print_trace_line(iter);
4603 ret = trace_print_seq(m, &iter->seq);
4605 * If we overflow the seq_file buffer, then it will
4606 * ask us for this data again at start up.
4608 * ret is 0 if seq_file write succeeded.
4611 iter->leftover = ret;
4618 * Should be used after trace_array_get(), trace_types_lock
4619 * ensures that i_cdev was already initialized.
4621 static inline int tracing_get_cpu(struct inode *inode)
4623 if (inode->i_cdev) /* See trace_create_cpu_file() */
4624 return (long)inode->i_cdev - 1;
4625 return RING_BUFFER_ALL_CPUS;
4628 static const struct seq_operations tracer_seq_ops = {
4635 static struct trace_iterator *
4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4638 struct trace_array *tr = inode->i_private;
4639 struct trace_iterator *iter;
4642 if (tracing_disabled)
4643 return ERR_PTR(-ENODEV);
4645 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4647 return ERR_PTR(-ENOMEM);
4649 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4651 if (!iter->buffer_iter)
4655 * trace_find_next_entry() may need to save off iter->ent.
4656 * It will place it into the iter->temp buffer. As most
4657 * events are less than 128, allocate a buffer of that size.
4658 * If one is greater, then trace_find_next_entry() will
4659 * allocate a new buffer to adjust for the bigger iter->ent.
4660 * It's not critical if it fails to get allocated here.
4662 iter->temp = kmalloc(128, GFP_KERNEL);
4664 iter->temp_size = 128;
4667 * trace_event_printf() may need to modify given format
4668 * string to replace %p with %px so that it shows real address
4669 * instead of hash value. However, that is only for the event
4670 * tracing, other tracer may not need. Defer the allocation
4671 * until it is needed.
4677 * We make a copy of the current tracer to avoid concurrent
4678 * changes on it while we are reading.
4680 mutex_lock(&trace_types_lock);
4681 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4685 *iter->trace = *tr->current_trace;
4687 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4692 #ifdef CONFIG_TRACER_MAX_TRACE
4693 /* Currently only the top directory has a snapshot */
4694 if (tr->current_trace->print_max || snapshot)
4695 iter->array_buffer = &tr->max_buffer;
4698 iter->array_buffer = &tr->array_buffer;
4699 iter->snapshot = snapshot;
4701 iter->cpu_file = tracing_get_cpu(inode);
4702 mutex_init(&iter->mutex);
4704 /* Notify the tracer early; before we stop tracing. */
4705 if (iter->trace->open)
4706 iter->trace->open(iter);
4708 /* Annotate start of buffers if we had overruns */
4709 if (ring_buffer_overruns(iter->array_buffer->buffer))
4710 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4712 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4713 if (trace_clocks[tr->clock_id].in_ns)
4714 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4717 * If pause-on-trace is enabled, then stop the trace while
4718 * dumping, unless this is the "snapshot" file
4720 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4721 tracing_stop_tr(tr);
4723 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4724 for_each_tracing_cpu(cpu) {
4725 iter->buffer_iter[cpu] =
4726 ring_buffer_read_prepare(iter->array_buffer->buffer,
4729 ring_buffer_read_prepare_sync();
4730 for_each_tracing_cpu(cpu) {
4731 ring_buffer_read_start(iter->buffer_iter[cpu]);
4732 tracing_iter_reset(iter, cpu);
4735 cpu = iter->cpu_file;
4736 iter->buffer_iter[cpu] =
4737 ring_buffer_read_prepare(iter->array_buffer->buffer,
4739 ring_buffer_read_prepare_sync();
4740 ring_buffer_read_start(iter->buffer_iter[cpu]);
4741 tracing_iter_reset(iter, cpu);
4744 mutex_unlock(&trace_types_lock);
4749 mutex_unlock(&trace_types_lock);
4752 kfree(iter->buffer_iter);
4754 seq_release_private(inode, file);
4755 return ERR_PTR(-ENOMEM);
4758 int tracing_open_generic(struct inode *inode, struct file *filp)
4762 ret = tracing_check_open_get_tr(NULL);
4766 filp->private_data = inode->i_private;
4770 bool tracing_is_disabled(void)
4772 return (tracing_disabled) ? true: false;
4776 * Open and update trace_array ref count.
4777 * Must have the current trace_array passed to it.
4779 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4781 struct trace_array *tr = inode->i_private;
4784 ret = tracing_check_open_get_tr(tr);
4788 filp->private_data = inode->i_private;
4793 static int tracing_release(struct inode *inode, struct file *file)
4795 struct trace_array *tr = inode->i_private;
4796 struct seq_file *m = file->private_data;
4797 struct trace_iterator *iter;
4800 if (!(file->f_mode & FMODE_READ)) {
4801 trace_array_put(tr);
4805 /* Writes do not use seq_file */
4807 mutex_lock(&trace_types_lock);
4809 for_each_tracing_cpu(cpu) {
4810 if (iter->buffer_iter[cpu])
4811 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4814 if (iter->trace && iter->trace->close)
4815 iter->trace->close(iter);
4817 if (!iter->snapshot && tr->stop_count)
4818 /* reenable tracing if it was previously enabled */
4819 tracing_start_tr(tr);
4821 __trace_array_put(tr);
4823 mutex_unlock(&trace_types_lock);
4825 mutex_destroy(&iter->mutex);
4826 free_cpumask_var(iter->started);
4830 kfree(iter->buffer_iter);
4831 seq_release_private(inode, file);
4836 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4838 struct trace_array *tr = inode->i_private;
4840 trace_array_put(tr);
4844 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4846 struct trace_array *tr = inode->i_private;
4848 trace_array_put(tr);
4850 return single_release(inode, file);
4853 static int tracing_open(struct inode *inode, struct file *file)
4855 struct trace_array *tr = inode->i_private;
4856 struct trace_iterator *iter;
4859 ret = tracing_check_open_get_tr(tr);
4863 /* If this file was open for write, then erase contents */
4864 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4865 int cpu = tracing_get_cpu(inode);
4866 struct array_buffer *trace_buf = &tr->array_buffer;
4868 #ifdef CONFIG_TRACER_MAX_TRACE
4869 if (tr->current_trace->print_max)
4870 trace_buf = &tr->max_buffer;
4873 if (cpu == RING_BUFFER_ALL_CPUS)
4874 tracing_reset_online_cpus(trace_buf);
4876 tracing_reset_cpu(trace_buf, cpu);
4879 if (file->f_mode & FMODE_READ) {
4880 iter = __tracing_open(inode, file, false);
4882 ret = PTR_ERR(iter);
4883 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4884 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4888 trace_array_put(tr);
4894 * Some tracers are not suitable for instance buffers.
4895 * A tracer is always available for the global array (toplevel)
4896 * or if it explicitly states that it is.
4899 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4901 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4904 /* Find the next tracer that this trace array may use */
4905 static struct tracer *
4906 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4908 while (t && !trace_ok_for_array(t, tr))
4915 t_next(struct seq_file *m, void *v, loff_t *pos)
4917 struct trace_array *tr = m->private;
4918 struct tracer *t = v;
4923 t = get_tracer_for_array(tr, t->next);
4928 static void *t_start(struct seq_file *m, loff_t *pos)
4930 struct trace_array *tr = m->private;
4934 mutex_lock(&trace_types_lock);
4936 t = get_tracer_for_array(tr, trace_types);
4937 for (; t && l < *pos; t = t_next(m, t, &l))
4943 static void t_stop(struct seq_file *m, void *p)
4945 mutex_unlock(&trace_types_lock);
4948 static int t_show(struct seq_file *m, void *v)
4950 struct tracer *t = v;
4955 seq_puts(m, t->name);
4964 static const struct seq_operations show_traces_seq_ops = {
4971 static int show_traces_open(struct inode *inode, struct file *file)
4973 struct trace_array *tr = inode->i_private;
4977 ret = tracing_check_open_get_tr(tr);
4981 ret = seq_open(file, &show_traces_seq_ops);
4983 trace_array_put(tr);
4987 m = file->private_data;
4993 static int show_traces_release(struct inode *inode, struct file *file)
4995 struct trace_array *tr = inode->i_private;
4997 trace_array_put(tr);
4998 return seq_release(inode, file);
5002 tracing_write_stub(struct file *filp, const char __user *ubuf,
5003 size_t count, loff_t *ppos)
5008 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5012 if (file->f_mode & FMODE_READ)
5013 ret = seq_lseek(file, offset, whence);
5015 file->f_pos = ret = 0;
5020 static const struct file_operations tracing_fops = {
5021 .open = tracing_open,
5023 .write = tracing_write_stub,
5024 .llseek = tracing_lseek,
5025 .release = tracing_release,
5028 static const struct file_operations show_traces_fops = {
5029 .open = show_traces_open,
5031 .llseek = seq_lseek,
5032 .release = show_traces_release,
5036 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5037 size_t count, loff_t *ppos)
5039 struct trace_array *tr = file_inode(filp)->i_private;
5043 len = snprintf(NULL, 0, "%*pb\n",
5044 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5045 mask_str = kmalloc(len, GFP_KERNEL);
5049 len = snprintf(mask_str, len, "%*pb\n",
5050 cpumask_pr_args(tr->tracing_cpumask));
5055 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5063 int tracing_set_cpumask(struct trace_array *tr,
5064 cpumask_var_t tracing_cpumask_new)
5071 local_irq_disable();
5072 arch_spin_lock(&tr->max_lock);
5073 for_each_tracing_cpu(cpu) {
5075 * Increase/decrease the disabled counter if we are
5076 * about to flip a bit in the cpumask:
5078 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5079 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5080 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5081 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5083 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5084 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5085 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5086 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5089 arch_spin_unlock(&tr->max_lock);
5092 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5098 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5099 size_t count, loff_t *ppos)
5101 struct trace_array *tr = file_inode(filp)->i_private;
5102 cpumask_var_t tracing_cpumask_new;
5105 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5108 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5112 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5116 free_cpumask_var(tracing_cpumask_new);
5121 free_cpumask_var(tracing_cpumask_new);
5126 static const struct file_operations tracing_cpumask_fops = {
5127 .open = tracing_open_generic_tr,
5128 .read = tracing_cpumask_read,
5129 .write = tracing_cpumask_write,
5130 .release = tracing_release_generic_tr,
5131 .llseek = generic_file_llseek,
5134 static int tracing_trace_options_show(struct seq_file *m, void *v)
5136 struct tracer_opt *trace_opts;
5137 struct trace_array *tr = m->private;
5141 mutex_lock(&trace_types_lock);
5142 tracer_flags = tr->current_trace->flags->val;
5143 trace_opts = tr->current_trace->flags->opts;
5145 for (i = 0; trace_options[i]; i++) {
5146 if (tr->trace_flags & (1 << i))
5147 seq_printf(m, "%s\n", trace_options[i]);
5149 seq_printf(m, "no%s\n", trace_options[i]);
5152 for (i = 0; trace_opts[i].name; i++) {
5153 if (tracer_flags & trace_opts[i].bit)
5154 seq_printf(m, "%s\n", trace_opts[i].name);
5156 seq_printf(m, "no%s\n", trace_opts[i].name);
5158 mutex_unlock(&trace_types_lock);
5163 static int __set_tracer_option(struct trace_array *tr,
5164 struct tracer_flags *tracer_flags,
5165 struct tracer_opt *opts, int neg)
5167 struct tracer *trace = tracer_flags->trace;
5170 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5175 tracer_flags->val &= ~opts->bit;
5177 tracer_flags->val |= opts->bit;
5181 /* Try to assign a tracer specific option */
5182 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5184 struct tracer *trace = tr->current_trace;
5185 struct tracer_flags *tracer_flags = trace->flags;
5186 struct tracer_opt *opts = NULL;
5189 for (i = 0; tracer_flags->opts[i].name; i++) {
5190 opts = &tracer_flags->opts[i];
5192 if (strcmp(cmp, opts->name) == 0)
5193 return __set_tracer_option(tr, trace->flags, opts, neg);
5199 /* Some tracers require overwrite to stay enabled */
5200 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5202 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5208 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5212 if ((mask == TRACE_ITER_RECORD_TGID) ||
5213 (mask == TRACE_ITER_RECORD_CMD))
5214 lockdep_assert_held(&event_mutex);
5216 /* do nothing if flag is already set */
5217 if (!!(tr->trace_flags & mask) == !!enabled)
5220 /* Give the tracer a chance to approve the change */
5221 if (tr->current_trace->flag_changed)
5222 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5226 tr->trace_flags |= mask;
5228 tr->trace_flags &= ~mask;
5230 if (mask == TRACE_ITER_RECORD_CMD)
5231 trace_event_enable_cmd_record(enabled);
5233 if (mask == TRACE_ITER_RECORD_TGID) {
5235 tgid_map_max = pid_max;
5236 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5240 * Pairs with smp_load_acquire() in
5241 * trace_find_tgid_ptr() to ensure that if it observes
5242 * the tgid_map we just allocated then it also observes
5243 * the corresponding tgid_map_max value.
5245 smp_store_release(&tgid_map, map);
5248 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5252 trace_event_enable_tgid_record(enabled);
5255 if (mask == TRACE_ITER_EVENT_FORK)
5256 trace_event_follow_fork(tr, enabled);
5258 if (mask == TRACE_ITER_FUNC_FORK)
5259 ftrace_pid_follow_fork(tr, enabled);
5261 if (mask == TRACE_ITER_OVERWRITE) {
5262 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5263 #ifdef CONFIG_TRACER_MAX_TRACE
5264 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5268 if (mask == TRACE_ITER_PRINTK) {
5269 trace_printk_start_stop_comm(enabled);
5270 trace_printk_control(enabled);
5276 int trace_set_options(struct trace_array *tr, char *option)
5281 size_t orig_len = strlen(option);
5284 cmp = strstrip(option);
5286 len = str_has_prefix(cmp, "no");
5292 mutex_lock(&event_mutex);
5293 mutex_lock(&trace_types_lock);
5295 ret = match_string(trace_options, -1, cmp);
5296 /* If no option could be set, test the specific tracer options */
5298 ret = set_tracer_option(tr, cmp, neg);
5300 ret = set_tracer_flag(tr, 1 << ret, !neg);
5302 mutex_unlock(&trace_types_lock);
5303 mutex_unlock(&event_mutex);
5306 * If the first trailing whitespace is replaced with '\0' by strstrip,
5307 * turn it back into a space.
5309 if (orig_len > strlen(option))
5310 option[strlen(option)] = ' ';
5315 static void __init apply_trace_boot_options(void)
5317 char *buf = trace_boot_options_buf;
5321 option = strsep(&buf, ",");
5327 trace_set_options(&global_trace, option);
5329 /* Put back the comma to allow this to be called again */
5336 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5337 size_t cnt, loff_t *ppos)
5339 struct seq_file *m = filp->private_data;
5340 struct trace_array *tr = m->private;
5344 if (cnt >= sizeof(buf))
5347 if (copy_from_user(buf, ubuf, cnt))
5352 ret = trace_set_options(tr, buf);
5361 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5363 struct trace_array *tr = inode->i_private;
5366 ret = tracing_check_open_get_tr(tr);
5370 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5372 trace_array_put(tr);
5377 static const struct file_operations tracing_iter_fops = {
5378 .open = tracing_trace_options_open,
5380 .llseek = seq_lseek,
5381 .release = tracing_single_release_tr,
5382 .write = tracing_trace_options_write,
5385 static const char readme_msg[] =
5386 "tracing mini-HOWTO:\n\n"
5387 "# echo 0 > tracing_on : quick way to disable tracing\n"
5388 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5389 " Important files:\n"
5390 " trace\t\t\t- The static contents of the buffer\n"
5391 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5392 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5393 " current_tracer\t- function and latency tracers\n"
5394 " available_tracers\t- list of configured tracers for current_tracer\n"
5395 " error_log\t- error log for failed commands (that support it)\n"
5396 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5397 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5398 " trace_clock\t\t-change the clock used to order events\n"
5399 " local: Per cpu clock but may not be synced across CPUs\n"
5400 " global: Synced across CPUs but slows tracing down.\n"
5401 " counter: Not a clock, but just an increment\n"
5402 " uptime: Jiffy counter from time of boot\n"
5403 " perf: Same clock that perf events use\n"
5404 #ifdef CONFIG_X86_64
5405 " x86-tsc: TSC cycle counter\n"
5407 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5408 " delta: Delta difference against a buffer-wide timestamp\n"
5409 " absolute: Absolute (standalone) timestamp\n"
5410 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5411 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5412 " tracing_cpumask\t- Limit which CPUs to trace\n"
5413 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5414 "\t\t\t Remove sub-buffer with rmdir\n"
5415 " trace_options\t\t- Set format or modify how tracing happens\n"
5416 "\t\t\t Disable an option by prefixing 'no' to the\n"
5417 "\t\t\t option name\n"
5418 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5419 #ifdef CONFIG_DYNAMIC_FTRACE
5420 "\n available_filter_functions - list of functions that can be filtered on\n"
5421 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5422 "\t\t\t functions\n"
5423 "\t accepts: func_full_name or glob-matching-pattern\n"
5424 "\t modules: Can select a group via module\n"
5425 "\t Format: :mod:<module-name>\n"
5426 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5427 "\t triggers: a command to perform when function is hit\n"
5428 "\t Format: <function>:<trigger>[:count]\n"
5429 "\t trigger: traceon, traceoff\n"
5430 "\t\t enable_event:<system>:<event>\n"
5431 "\t\t disable_event:<system>:<event>\n"
5432 #ifdef CONFIG_STACKTRACE
5435 #ifdef CONFIG_TRACER_SNAPSHOT
5440 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5441 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5442 "\t The first one will disable tracing every time do_fault is hit\n"
5443 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5444 "\t The first time do trap is hit and it disables tracing, the\n"
5445 "\t counter will decrement to 2. If tracing is already disabled,\n"
5446 "\t the counter will not decrement. It only decrements when the\n"
5447 "\t trigger did work\n"
5448 "\t To remove trigger without count:\n"
5449 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5450 "\t To remove trigger with a count:\n"
5451 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5452 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5453 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5454 "\t modules: Can select a group via module command :mod:\n"
5455 "\t Does not accept triggers\n"
5456 #endif /* CONFIG_DYNAMIC_FTRACE */
5457 #ifdef CONFIG_FUNCTION_TRACER
5458 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5460 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5463 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5464 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5465 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5466 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5468 #ifdef CONFIG_TRACER_SNAPSHOT
5469 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5470 "\t\t\t snapshot buffer. Read the contents for more\n"
5471 "\t\t\t information\n"
5473 #ifdef CONFIG_STACK_TRACER
5474 " stack_trace\t\t- Shows the max stack trace when active\n"
5475 " stack_max_size\t- Shows current max stack size that was traced\n"
5476 "\t\t\t Write into this file to reset the max size (trigger a\n"
5477 "\t\t\t new trace)\n"
5478 #ifdef CONFIG_DYNAMIC_FTRACE
5479 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5482 #endif /* CONFIG_STACK_TRACER */
5483 #ifdef CONFIG_DYNAMIC_EVENTS
5484 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5485 "\t\t\t Write into this file to define/undefine new trace events.\n"
5487 #ifdef CONFIG_KPROBE_EVENTS
5488 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5489 "\t\t\t Write into this file to define/undefine new trace events.\n"
5491 #ifdef CONFIG_UPROBE_EVENTS
5492 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5493 "\t\t\t Write into this file to define/undefine new trace events.\n"
5495 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5496 "\t accepts: event-definitions (one definition per line)\n"
5497 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5498 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5499 #ifdef CONFIG_HIST_TRIGGERS
5500 "\t s:[synthetic/]<event> <field> [<field>]\n"
5502 "\t -:[<group>/]<event>\n"
5503 #ifdef CONFIG_KPROBE_EVENTS
5504 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5505 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5507 #ifdef CONFIG_UPROBE_EVENTS
5508 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5510 "\t args: <name>=fetcharg[:type]\n"
5511 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5512 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5513 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5515 "\t $stack<index>, $stack, $retval, $comm,\n"
5517 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5518 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5519 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5520 "\t <type>\\[<array-size>\\]\n"
5521 #ifdef CONFIG_HIST_TRIGGERS
5522 "\t field: <stype> <name>;\n"
5523 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5524 "\t [unsigned] char/int/long\n"
5527 " events/\t\t- Directory containing all trace event subsystems:\n"
5528 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5529 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5530 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5532 " filter\t\t- If set, only events passing filter are traced\n"
5533 " events/<system>/<event>/\t- Directory containing control files for\n"
5535 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5536 " filter\t\t- If set, only events passing filter are traced\n"
5537 " trigger\t\t- If set, a command to perform when event is hit\n"
5538 "\t Format: <trigger>[:count][if <filter>]\n"
5539 "\t trigger: traceon, traceoff\n"
5540 "\t enable_event:<system>:<event>\n"
5541 "\t disable_event:<system>:<event>\n"
5542 #ifdef CONFIG_HIST_TRIGGERS
5543 "\t enable_hist:<system>:<event>\n"
5544 "\t disable_hist:<system>:<event>\n"
5546 #ifdef CONFIG_STACKTRACE
5549 #ifdef CONFIG_TRACER_SNAPSHOT
5552 #ifdef CONFIG_HIST_TRIGGERS
5553 "\t\t hist (see below)\n"
5555 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5556 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5557 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5558 "\t events/block/block_unplug/trigger\n"
5559 "\t The first disables tracing every time block_unplug is hit.\n"
5560 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5561 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5562 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5563 "\t Like function triggers, the counter is only decremented if it\n"
5564 "\t enabled or disabled tracing.\n"
5565 "\t To remove a trigger without a count:\n"
5566 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5567 "\t To remove a trigger with a count:\n"
5568 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5569 "\t Filters can be ignored when removing a trigger.\n"
5570 #ifdef CONFIG_HIST_TRIGGERS
5571 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5572 "\t Format: hist:keys=<field1[,field2,...]>\n"
5573 "\t [:values=<field1[,field2,...]>]\n"
5574 "\t [:sort=<field1[,field2,...]>]\n"
5575 "\t [:size=#entries]\n"
5576 "\t [:pause][:continue][:clear]\n"
5577 "\t [:name=histname1]\n"
5578 "\t [:<handler>.<action>]\n"
5579 "\t [if <filter>]\n\n"
5580 "\t Note, special fields can be used as well:\n"
5581 "\t common_timestamp - to record current timestamp\n"
5582 "\t common_cpu - to record the CPU the event happened on\n"
5584 "\t When a matching event is hit, an entry is added to a hash\n"
5585 "\t table using the key(s) and value(s) named, and the value of a\n"
5586 "\t sum called 'hitcount' is incremented. Keys and values\n"
5587 "\t correspond to fields in the event's format description. Keys\n"
5588 "\t can be any field, or the special string 'stacktrace'.\n"
5589 "\t Compound keys consisting of up to two fields can be specified\n"
5590 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5591 "\t fields. Sort keys consisting of up to two fields can be\n"
5592 "\t specified using the 'sort' keyword. The sort direction can\n"
5593 "\t be modified by appending '.descending' or '.ascending' to a\n"
5594 "\t sort field. The 'size' parameter can be used to specify more\n"
5595 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5596 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5597 "\t its histogram data will be shared with other triggers of the\n"
5598 "\t same name, and trigger hits will update this common data.\n\n"
5599 "\t Reading the 'hist' file for the event will dump the hash\n"
5600 "\t table in its entirety to stdout. If there are multiple hist\n"
5601 "\t triggers attached to an event, there will be a table for each\n"
5602 "\t trigger in the output. The table displayed for a named\n"
5603 "\t trigger will be the same as any other instance having the\n"
5604 "\t same name. The default format used to display a given field\n"
5605 "\t can be modified by appending any of the following modifiers\n"
5606 "\t to the field name, as applicable:\n\n"
5607 "\t .hex display a number as a hex value\n"
5608 "\t .sym display an address as a symbol\n"
5609 "\t .sym-offset display an address as a symbol and offset\n"
5610 "\t .execname display a common_pid as a program name\n"
5611 "\t .syscall display a syscall id as a syscall name\n"
5612 "\t .log2 display log2 value rather than raw number\n"
5613 "\t .usecs display a common_timestamp in microseconds\n\n"
5614 "\t The 'pause' parameter can be used to pause an existing hist\n"
5615 "\t trigger or to start a hist trigger but not log any events\n"
5616 "\t until told to do so. 'continue' can be used to start or\n"
5617 "\t restart a paused hist trigger.\n\n"
5618 "\t The 'clear' parameter will clear the contents of a running\n"
5619 "\t hist trigger and leave its current paused/active state\n"
5621 "\t The enable_hist and disable_hist triggers can be used to\n"
5622 "\t have one event conditionally start and stop another event's\n"
5623 "\t already-attached hist trigger. The syntax is analogous to\n"
5624 "\t the enable_event and disable_event triggers.\n\n"
5625 "\t Hist trigger handlers and actions are executed whenever a\n"
5626 "\t a histogram entry is added or updated. They take the form:\n\n"
5627 "\t <handler>.<action>\n\n"
5628 "\t The available handlers are:\n\n"
5629 "\t onmatch(matching.event) - invoke on addition or update\n"
5630 "\t onmax(var) - invoke if var exceeds current max\n"
5631 "\t onchange(var) - invoke action if var changes\n\n"
5632 "\t The available actions are:\n\n"
5633 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5634 "\t save(field,...) - save current event fields\n"
5635 #ifdef CONFIG_TRACER_SNAPSHOT
5636 "\t snapshot() - snapshot the trace buffer\n\n"
5638 #ifdef CONFIG_SYNTH_EVENTS
5639 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5640 "\t Write into this file to define/undefine new synthetic events.\n"
5641 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5647 tracing_readme_read(struct file *filp, char __user *ubuf,
5648 size_t cnt, loff_t *ppos)
5650 return simple_read_from_buffer(ubuf, cnt, ppos,
5651 readme_msg, strlen(readme_msg));
5654 static const struct file_operations tracing_readme_fops = {
5655 .open = tracing_open_generic,
5656 .read = tracing_readme_read,
5657 .llseek = generic_file_llseek,
5660 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5664 return trace_find_tgid_ptr(pid);
5667 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5671 return trace_find_tgid_ptr(pid);
5674 static void saved_tgids_stop(struct seq_file *m, void *v)
5678 static int saved_tgids_show(struct seq_file *m, void *v)
5680 int *entry = (int *)v;
5681 int pid = entry - tgid_map;
5687 seq_printf(m, "%d %d\n", pid, tgid);
5691 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5692 .start = saved_tgids_start,
5693 .stop = saved_tgids_stop,
5694 .next = saved_tgids_next,
5695 .show = saved_tgids_show,
5698 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5702 ret = tracing_check_open_get_tr(NULL);
5706 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5710 static const struct file_operations tracing_saved_tgids_fops = {
5711 .open = tracing_saved_tgids_open,
5713 .llseek = seq_lseek,
5714 .release = seq_release,
5717 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5719 unsigned int *ptr = v;
5721 if (*pos || m->count)
5726 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5728 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5737 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5743 arch_spin_lock(&trace_cmdline_lock);
5745 v = &savedcmd->map_cmdline_to_pid[0];
5747 v = saved_cmdlines_next(m, v, &l);
5755 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5757 arch_spin_unlock(&trace_cmdline_lock);
5761 static int saved_cmdlines_show(struct seq_file *m, void *v)
5763 char buf[TASK_COMM_LEN];
5764 unsigned int *pid = v;
5766 __trace_find_cmdline(*pid, buf);
5767 seq_printf(m, "%d %s\n", *pid, buf);
5771 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5772 .start = saved_cmdlines_start,
5773 .next = saved_cmdlines_next,
5774 .stop = saved_cmdlines_stop,
5775 .show = saved_cmdlines_show,
5778 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5782 ret = tracing_check_open_get_tr(NULL);
5786 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5789 static const struct file_operations tracing_saved_cmdlines_fops = {
5790 .open = tracing_saved_cmdlines_open,
5792 .llseek = seq_lseek,
5793 .release = seq_release,
5797 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5798 size_t cnt, loff_t *ppos)
5803 arch_spin_lock(&trace_cmdline_lock);
5804 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5805 arch_spin_unlock(&trace_cmdline_lock);
5807 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5810 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5812 kfree(s->saved_cmdlines);
5813 kfree(s->map_cmdline_to_pid);
5817 static int tracing_resize_saved_cmdlines(unsigned int val)
5819 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5821 s = kmalloc(sizeof(*s), GFP_KERNEL);
5825 if (allocate_cmdlines_buffer(val, s) < 0) {
5830 arch_spin_lock(&trace_cmdline_lock);
5831 savedcmd_temp = savedcmd;
5833 arch_spin_unlock(&trace_cmdline_lock);
5834 free_saved_cmdlines_buffer(savedcmd_temp);
5840 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5841 size_t cnt, loff_t *ppos)
5846 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5850 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5851 if (!val || val > PID_MAX_DEFAULT)
5854 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5863 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5864 .open = tracing_open_generic,
5865 .read = tracing_saved_cmdlines_size_read,
5866 .write = tracing_saved_cmdlines_size_write,
5869 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5870 static union trace_eval_map_item *
5871 update_eval_map(union trace_eval_map_item *ptr)
5873 if (!ptr->map.eval_string) {
5874 if (ptr->tail.next) {
5875 ptr = ptr->tail.next;
5876 /* Set ptr to the next real item (skip head) */
5884 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5886 union trace_eval_map_item *ptr = v;
5889 * Paranoid! If ptr points to end, we don't want to increment past it.
5890 * This really should never happen.
5893 ptr = update_eval_map(ptr);
5894 if (WARN_ON_ONCE(!ptr))
5898 ptr = update_eval_map(ptr);
5903 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5905 union trace_eval_map_item *v;
5908 mutex_lock(&trace_eval_mutex);
5910 v = trace_eval_maps;
5914 while (v && l < *pos) {
5915 v = eval_map_next(m, v, &l);
5921 static void eval_map_stop(struct seq_file *m, void *v)
5923 mutex_unlock(&trace_eval_mutex);
5926 static int eval_map_show(struct seq_file *m, void *v)
5928 union trace_eval_map_item *ptr = v;
5930 seq_printf(m, "%s %ld (%s)\n",
5931 ptr->map.eval_string, ptr->map.eval_value,
5937 static const struct seq_operations tracing_eval_map_seq_ops = {
5938 .start = eval_map_start,
5939 .next = eval_map_next,
5940 .stop = eval_map_stop,
5941 .show = eval_map_show,
5944 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5948 ret = tracing_check_open_get_tr(NULL);
5952 return seq_open(filp, &tracing_eval_map_seq_ops);
5955 static const struct file_operations tracing_eval_map_fops = {
5956 .open = tracing_eval_map_open,
5958 .llseek = seq_lseek,
5959 .release = seq_release,
5962 static inline union trace_eval_map_item *
5963 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5965 /* Return tail of array given the head */
5966 return ptr + ptr->head.length + 1;
5970 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5973 struct trace_eval_map **stop;
5974 struct trace_eval_map **map;
5975 union trace_eval_map_item *map_array;
5976 union trace_eval_map_item *ptr;
5981 * The trace_eval_maps contains the map plus a head and tail item,
5982 * where the head holds the module and length of array, and the
5983 * tail holds a pointer to the next list.
5985 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5987 pr_warn("Unable to allocate trace eval mapping\n");
5991 mutex_lock(&trace_eval_mutex);
5993 if (!trace_eval_maps)
5994 trace_eval_maps = map_array;
5996 ptr = trace_eval_maps;
5998 ptr = trace_eval_jmp_to_tail(ptr);
5999 if (!ptr->tail.next)
6001 ptr = ptr->tail.next;
6004 ptr->tail.next = map_array;
6006 map_array->head.mod = mod;
6007 map_array->head.length = len;
6010 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6011 map_array->map = **map;
6014 memset(map_array, 0, sizeof(*map_array));
6016 mutex_unlock(&trace_eval_mutex);
6019 static void trace_create_eval_file(struct dentry *d_tracer)
6021 trace_create_file("eval_map", 0444, d_tracer,
6022 NULL, &tracing_eval_map_fops);
6025 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6026 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6027 static inline void trace_insert_eval_map_file(struct module *mod,
6028 struct trace_eval_map **start, int len) { }
6029 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6031 static void trace_insert_eval_map(struct module *mod,
6032 struct trace_eval_map **start, int len)
6034 struct trace_eval_map **map;
6041 trace_event_eval_update(map, len);
6043 trace_insert_eval_map_file(mod, start, len);
6047 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6048 size_t cnt, loff_t *ppos)
6050 struct trace_array *tr = filp->private_data;
6051 char buf[MAX_TRACER_SIZE+2];
6054 mutex_lock(&trace_types_lock);
6055 r = sprintf(buf, "%s\n", tr->current_trace->name);
6056 mutex_unlock(&trace_types_lock);
6058 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6061 int tracer_init(struct tracer *t, struct trace_array *tr)
6063 tracing_reset_online_cpus(&tr->array_buffer);
6067 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6071 for_each_tracing_cpu(cpu)
6072 per_cpu_ptr(buf->data, cpu)->entries = val;
6075 #ifdef CONFIG_TRACER_MAX_TRACE
6076 /* resize @tr's buffer to the size of @size_tr's entries */
6077 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6078 struct array_buffer *size_buf, int cpu_id)
6082 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6083 for_each_tracing_cpu(cpu) {
6084 ret = ring_buffer_resize(trace_buf->buffer,
6085 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6088 per_cpu_ptr(trace_buf->data, cpu)->entries =
6089 per_cpu_ptr(size_buf->data, cpu)->entries;
6092 ret = ring_buffer_resize(trace_buf->buffer,
6093 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6095 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6096 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6101 #endif /* CONFIG_TRACER_MAX_TRACE */
6103 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6104 unsigned long size, int cpu)
6109 * If kernel or user changes the size of the ring buffer
6110 * we use the size that was given, and we can forget about
6111 * expanding it later.
6113 ring_buffer_expanded = true;
6115 /* May be called before buffers are initialized */
6116 if (!tr->array_buffer.buffer)
6119 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6123 #ifdef CONFIG_TRACER_MAX_TRACE
6124 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6125 !tr->current_trace->use_max_tr)
6128 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6130 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6131 &tr->array_buffer, cpu);
6134 * AARGH! We are left with different
6135 * size max buffer!!!!
6136 * The max buffer is our "snapshot" buffer.
6137 * When a tracer needs a snapshot (one of the
6138 * latency tracers), it swaps the max buffer
6139 * with the saved snap shot. We succeeded to
6140 * update the size of the main buffer, but failed to
6141 * update the size of the max buffer. But when we tried
6142 * to reset the main buffer to the original size, we
6143 * failed there too. This is very unlikely to
6144 * happen, but if it does, warn and kill all
6148 tracing_disabled = 1;
6153 if (cpu == RING_BUFFER_ALL_CPUS)
6154 set_buffer_entries(&tr->max_buffer, size);
6156 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6159 #endif /* CONFIG_TRACER_MAX_TRACE */
6161 if (cpu == RING_BUFFER_ALL_CPUS)
6162 set_buffer_entries(&tr->array_buffer, size);
6164 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6169 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6170 unsigned long size, int cpu_id)
6174 mutex_lock(&trace_types_lock);
6176 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6177 /* make sure, this cpu is enabled in the mask */
6178 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6184 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6189 mutex_unlock(&trace_types_lock);
6196 * tracing_update_buffers - used by tracing facility to expand ring buffers
6198 * To save on memory when the tracing is never used on a system with it
6199 * configured in. The ring buffers are set to a minimum size. But once
6200 * a user starts to use the tracing facility, then they need to grow
6201 * to their default size.
6203 * This function is to be called when a tracer is about to be used.
6205 int tracing_update_buffers(void)
6209 mutex_lock(&trace_types_lock);
6210 if (!ring_buffer_expanded)
6211 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6212 RING_BUFFER_ALL_CPUS);
6213 mutex_unlock(&trace_types_lock);
6218 struct trace_option_dentry;
6221 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6224 * Used to clear out the tracer before deletion of an instance.
6225 * Must have trace_types_lock held.
6227 static void tracing_set_nop(struct trace_array *tr)
6229 if (tr->current_trace == &nop_trace)
6232 tr->current_trace->enabled--;
6234 if (tr->current_trace->reset)
6235 tr->current_trace->reset(tr);
6237 tr->current_trace = &nop_trace;
6240 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6242 /* Only enable if the directory has been created already. */
6246 create_trace_option_files(tr, t);
6249 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6252 #ifdef CONFIG_TRACER_MAX_TRACE
6257 mutex_lock(&trace_types_lock);
6259 if (!ring_buffer_expanded) {
6260 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6261 RING_BUFFER_ALL_CPUS);
6267 for (t = trace_types; t; t = t->next) {
6268 if (strcmp(t->name, buf) == 0)
6275 if (t == tr->current_trace)
6278 #ifdef CONFIG_TRACER_SNAPSHOT
6279 if (t->use_max_tr) {
6280 arch_spin_lock(&tr->max_lock);
6281 if (tr->cond_snapshot)
6283 arch_spin_unlock(&tr->max_lock);
6288 /* Some tracers won't work on kernel command line */
6289 if (system_state < SYSTEM_RUNNING && t->noboot) {
6290 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6295 /* Some tracers are only allowed for the top level buffer */
6296 if (!trace_ok_for_array(t, tr)) {
6301 /* If trace pipe files are being read, we can't change the tracer */
6302 if (tr->trace_ref) {
6307 trace_branch_disable();
6309 tr->current_trace->enabled--;
6311 if (tr->current_trace->reset)
6312 tr->current_trace->reset(tr);
6314 /* Current trace needs to be nop_trace before synchronize_rcu */
6315 tr->current_trace = &nop_trace;
6317 #ifdef CONFIG_TRACER_MAX_TRACE
6318 had_max_tr = tr->allocated_snapshot;
6320 if (had_max_tr && !t->use_max_tr) {
6322 * We need to make sure that the update_max_tr sees that
6323 * current_trace changed to nop_trace to keep it from
6324 * swapping the buffers after we resize it.
6325 * The update_max_tr is called from interrupts disabled
6326 * so a synchronized_sched() is sufficient.
6333 #ifdef CONFIG_TRACER_MAX_TRACE
6334 if (t->use_max_tr && !had_max_tr) {
6335 ret = tracing_alloc_snapshot_instance(tr);
6342 ret = tracer_init(t, tr);
6347 tr->current_trace = t;
6348 tr->current_trace->enabled++;
6349 trace_branch_enable(tr);
6351 mutex_unlock(&trace_types_lock);
6357 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6358 size_t cnt, loff_t *ppos)
6360 struct trace_array *tr = filp->private_data;
6361 char buf[MAX_TRACER_SIZE+1];
6368 if (cnt > MAX_TRACER_SIZE)
6369 cnt = MAX_TRACER_SIZE;
6371 if (copy_from_user(buf, ubuf, cnt))
6376 /* strip ending whitespace. */
6377 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6380 err = tracing_set_tracer(tr, buf);
6390 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6391 size_t cnt, loff_t *ppos)
6396 r = snprintf(buf, sizeof(buf), "%ld\n",
6397 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6398 if (r > sizeof(buf))
6400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6404 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6405 size_t cnt, loff_t *ppos)
6410 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6420 tracing_thresh_read(struct file *filp, char __user *ubuf,
6421 size_t cnt, loff_t *ppos)
6423 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6427 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6428 size_t cnt, loff_t *ppos)
6430 struct trace_array *tr = filp->private_data;
6433 mutex_lock(&trace_types_lock);
6434 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6438 if (tr->current_trace->update_thresh) {
6439 ret = tr->current_trace->update_thresh(tr);
6446 mutex_unlock(&trace_types_lock);
6451 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6454 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6455 size_t cnt, loff_t *ppos)
6457 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6461 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6462 size_t cnt, loff_t *ppos)
6464 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6469 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6471 struct trace_array *tr = inode->i_private;
6472 struct trace_iterator *iter;
6475 ret = tracing_check_open_get_tr(tr);
6479 mutex_lock(&trace_types_lock);
6481 /* create a buffer to store the information to pass to userspace */
6482 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6485 __trace_array_put(tr);
6489 trace_seq_init(&iter->seq);
6490 iter->trace = tr->current_trace;
6492 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6497 /* trace pipe does not show start of buffer */
6498 cpumask_setall(iter->started);
6500 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6501 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6503 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6504 if (trace_clocks[tr->clock_id].in_ns)
6505 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6508 iter->array_buffer = &tr->array_buffer;
6509 iter->cpu_file = tracing_get_cpu(inode);
6510 mutex_init(&iter->mutex);
6511 filp->private_data = iter;
6513 if (iter->trace->pipe_open)
6514 iter->trace->pipe_open(iter);
6516 nonseekable_open(inode, filp);
6520 mutex_unlock(&trace_types_lock);
6525 __trace_array_put(tr);
6526 mutex_unlock(&trace_types_lock);
6530 static int tracing_release_pipe(struct inode *inode, struct file *file)
6532 struct trace_iterator *iter = file->private_data;
6533 struct trace_array *tr = inode->i_private;
6535 mutex_lock(&trace_types_lock);
6539 if (iter->trace->pipe_close)
6540 iter->trace->pipe_close(iter);
6542 mutex_unlock(&trace_types_lock);
6544 free_cpumask_var(iter->started);
6545 mutex_destroy(&iter->mutex);
6548 trace_array_put(tr);
6554 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6556 struct trace_array *tr = iter->tr;
6558 /* Iterators are static, they should be filled or empty */
6559 if (trace_buffer_iter(iter, iter->cpu_file))
6560 return EPOLLIN | EPOLLRDNORM;
6562 if (tr->trace_flags & TRACE_ITER_BLOCK)
6564 * Always select as readable when in blocking mode
6566 return EPOLLIN | EPOLLRDNORM;
6568 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6573 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6575 struct trace_iterator *iter = filp->private_data;
6577 return trace_poll(iter, filp, poll_table);
6580 /* Must be called with iter->mutex held. */
6581 static int tracing_wait_pipe(struct file *filp)
6583 struct trace_iterator *iter = filp->private_data;
6586 while (trace_empty(iter)) {
6588 if ((filp->f_flags & O_NONBLOCK)) {
6593 * We block until we read something and tracing is disabled.
6594 * We still block if tracing is disabled, but we have never
6595 * read anything. This allows a user to cat this file, and
6596 * then enable tracing. But after we have read something,
6597 * we give an EOF when tracing is again disabled.
6599 * iter->pos will be 0 if we haven't read anything.
6601 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6604 mutex_unlock(&iter->mutex);
6606 ret = wait_on_pipe(iter, 0);
6608 mutex_lock(&iter->mutex);
6621 tracing_read_pipe(struct file *filp, char __user *ubuf,
6622 size_t cnt, loff_t *ppos)
6624 struct trace_iterator *iter = filp->private_data;
6628 * Avoid more than one consumer on a single file descriptor
6629 * This is just a matter of traces coherency, the ring buffer itself
6632 mutex_lock(&iter->mutex);
6634 /* return any leftover data */
6635 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6639 trace_seq_init(&iter->seq);
6641 if (iter->trace->read) {
6642 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6648 sret = tracing_wait_pipe(filp);
6652 /* stop when tracing is finished */
6653 if (trace_empty(iter)) {
6658 if (cnt >= PAGE_SIZE)
6659 cnt = PAGE_SIZE - 1;
6661 /* reset all but tr, trace, and overruns */
6662 memset(&iter->seq, 0,
6663 sizeof(struct trace_iterator) -
6664 offsetof(struct trace_iterator, seq));
6665 cpumask_clear(iter->started);
6666 trace_seq_init(&iter->seq);
6669 trace_event_read_lock();
6670 trace_access_lock(iter->cpu_file);
6671 while (trace_find_next_entry_inc(iter) != NULL) {
6672 enum print_line_t ret;
6673 int save_len = iter->seq.seq.len;
6675 ret = print_trace_line(iter);
6676 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6677 /* don't print partial lines */
6678 iter->seq.seq.len = save_len;
6681 if (ret != TRACE_TYPE_NO_CONSUME)
6682 trace_consume(iter);
6684 if (trace_seq_used(&iter->seq) >= cnt)
6688 * Setting the full flag means we reached the trace_seq buffer
6689 * size and we should leave by partial output condition above.
6690 * One of the trace_seq_* functions is not used properly.
6692 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6695 trace_access_unlock(iter->cpu_file);
6696 trace_event_read_unlock();
6698 /* Now copy what we have to the user */
6699 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6700 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6701 trace_seq_init(&iter->seq);
6704 * If there was nothing to send to user, in spite of consuming trace
6705 * entries, go back to wait for more entries.
6711 mutex_unlock(&iter->mutex);
6716 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6719 __free_page(spd->pages[idx]);
6723 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6729 /* Seq buffer is page-sized, exactly what we need. */
6731 save_len = iter->seq.seq.len;
6732 ret = print_trace_line(iter);
6734 if (trace_seq_has_overflowed(&iter->seq)) {
6735 iter->seq.seq.len = save_len;
6740 * This should not be hit, because it should only
6741 * be set if the iter->seq overflowed. But check it
6742 * anyway to be safe.
6744 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6745 iter->seq.seq.len = save_len;
6749 count = trace_seq_used(&iter->seq) - save_len;
6752 iter->seq.seq.len = save_len;
6756 if (ret != TRACE_TYPE_NO_CONSUME)
6757 trace_consume(iter);
6759 if (!trace_find_next_entry_inc(iter)) {
6769 static ssize_t tracing_splice_read_pipe(struct file *filp,
6771 struct pipe_inode_info *pipe,
6775 struct page *pages_def[PIPE_DEF_BUFFERS];
6776 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6777 struct trace_iterator *iter = filp->private_data;
6778 struct splice_pipe_desc spd = {
6780 .partial = partial_def,
6781 .nr_pages = 0, /* This gets updated below. */
6782 .nr_pages_max = PIPE_DEF_BUFFERS,
6783 .ops = &default_pipe_buf_ops,
6784 .spd_release = tracing_spd_release_pipe,
6790 if (splice_grow_spd(pipe, &spd))
6793 mutex_lock(&iter->mutex);
6795 if (iter->trace->splice_read) {
6796 ret = iter->trace->splice_read(iter, filp,
6797 ppos, pipe, len, flags);
6802 ret = tracing_wait_pipe(filp);
6806 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6811 trace_event_read_lock();
6812 trace_access_lock(iter->cpu_file);
6814 /* Fill as many pages as possible. */
6815 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6816 spd.pages[i] = alloc_page(GFP_KERNEL);
6820 rem = tracing_fill_pipe_page(rem, iter);
6822 /* Copy the data into the page, so we can start over. */
6823 ret = trace_seq_to_buffer(&iter->seq,
6824 page_address(spd.pages[i]),
6825 trace_seq_used(&iter->seq));
6827 __free_page(spd.pages[i]);
6830 spd.partial[i].offset = 0;
6831 spd.partial[i].len = trace_seq_used(&iter->seq);
6833 trace_seq_init(&iter->seq);
6836 trace_access_unlock(iter->cpu_file);
6837 trace_event_read_unlock();
6838 mutex_unlock(&iter->mutex);
6843 ret = splice_to_pipe(pipe, &spd);
6847 splice_shrink_spd(&spd);
6851 mutex_unlock(&iter->mutex);
6856 tracing_entries_read(struct file *filp, char __user *ubuf,
6857 size_t cnt, loff_t *ppos)
6859 struct inode *inode = file_inode(filp);
6860 struct trace_array *tr = inode->i_private;
6861 int cpu = tracing_get_cpu(inode);
6866 mutex_lock(&trace_types_lock);
6868 if (cpu == RING_BUFFER_ALL_CPUS) {
6869 int cpu, buf_size_same;
6874 /* check if all cpu sizes are same */
6875 for_each_tracing_cpu(cpu) {
6876 /* fill in the size from first enabled cpu */
6878 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6879 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6885 if (buf_size_same) {
6886 if (!ring_buffer_expanded)
6887 r = sprintf(buf, "%lu (expanded: %lu)\n",
6889 trace_buf_size >> 10);
6891 r = sprintf(buf, "%lu\n", size >> 10);
6893 r = sprintf(buf, "X\n");
6895 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6897 mutex_unlock(&trace_types_lock);
6899 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6904 tracing_entries_write(struct file *filp, const char __user *ubuf,
6905 size_t cnt, loff_t *ppos)
6907 struct inode *inode = file_inode(filp);
6908 struct trace_array *tr = inode->i_private;
6912 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6916 /* must have at least 1 entry */
6920 /* value is in KB */
6922 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6932 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6933 size_t cnt, loff_t *ppos)
6935 struct trace_array *tr = filp->private_data;
6938 unsigned long size = 0, expanded_size = 0;
6940 mutex_lock(&trace_types_lock);
6941 for_each_tracing_cpu(cpu) {
6942 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6943 if (!ring_buffer_expanded)
6944 expanded_size += trace_buf_size >> 10;
6946 if (ring_buffer_expanded)
6947 r = sprintf(buf, "%lu\n", size);
6949 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6950 mutex_unlock(&trace_types_lock);
6952 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6956 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6957 size_t cnt, loff_t *ppos)
6960 * There is no need to read what the user has written, this function
6961 * is just to make sure that there is no error when "echo" is used
6970 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6972 struct trace_array *tr = inode->i_private;
6974 /* disable tracing ? */
6975 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6976 tracer_tracing_off(tr);
6977 /* resize the ring buffer to 0 */
6978 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6980 trace_array_put(tr);
6986 tracing_mark_write(struct file *filp, const char __user *ubuf,
6987 size_t cnt, loff_t *fpos)
6989 struct trace_array *tr = filp->private_data;
6990 struct ring_buffer_event *event;
6991 enum event_trigger_type tt = ETT_NONE;
6992 struct trace_buffer *buffer;
6993 struct print_entry *entry;
6998 /* Used in tracing_mark_raw_write() as well */
6999 #define FAULTED_STR "<faulted>"
7000 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7002 if (tracing_disabled)
7005 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7008 if (cnt > TRACE_BUF_SIZE)
7009 cnt = TRACE_BUF_SIZE;
7011 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7013 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7015 /* If less than "<faulted>", then make sure we can still add that */
7016 if (cnt < FAULTED_SIZE)
7017 size += FAULTED_SIZE - cnt;
7019 buffer = tr->array_buffer.buffer;
7020 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7022 if (unlikely(!event))
7023 /* Ring buffer disabled, return as if not open for write */
7026 entry = ring_buffer_event_data(event);
7027 entry->ip = _THIS_IP_;
7029 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7031 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7037 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7038 /* do not add \n before testing triggers, but add \0 */
7039 entry->buf[cnt] = '\0';
7040 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7043 if (entry->buf[cnt - 1] != '\n') {
7044 entry->buf[cnt] = '\n';
7045 entry->buf[cnt + 1] = '\0';
7047 entry->buf[cnt] = '\0';
7049 if (static_branch_unlikely(&trace_marker_exports_enabled))
7050 ftrace_exports(event, TRACE_EXPORT_MARKER);
7051 __buffer_unlock_commit(buffer, event);
7054 event_triggers_post_call(tr->trace_marker_file, tt);
7062 /* Limit it for now to 3K (including tag) */
7063 #define RAW_DATA_MAX_SIZE (1024*3)
7066 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7067 size_t cnt, loff_t *fpos)
7069 struct trace_array *tr = filp->private_data;
7070 struct ring_buffer_event *event;
7071 struct trace_buffer *buffer;
7072 struct raw_data_entry *entry;
7077 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7079 if (tracing_disabled)
7082 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7085 /* The marker must at least have a tag id */
7086 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7089 if (cnt > TRACE_BUF_SIZE)
7090 cnt = TRACE_BUF_SIZE;
7092 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7094 size = sizeof(*entry) + cnt;
7095 if (cnt < FAULT_SIZE_ID)
7096 size += FAULT_SIZE_ID - cnt;
7098 buffer = tr->array_buffer.buffer;
7099 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7102 /* Ring buffer disabled, return as if not open for write */
7105 entry = ring_buffer_event_data(event);
7107 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7110 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7115 __buffer_unlock_commit(buffer, event);
7123 static int tracing_clock_show(struct seq_file *m, void *v)
7125 struct trace_array *tr = m->private;
7128 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7130 "%s%s%s%s", i ? " " : "",
7131 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7132 i == tr->clock_id ? "]" : "");
7138 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7142 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7143 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7146 if (i == ARRAY_SIZE(trace_clocks))
7149 mutex_lock(&trace_types_lock);
7153 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7156 * New clock may not be consistent with the previous clock.
7157 * Reset the buffer so that it doesn't have incomparable timestamps.
7159 tracing_reset_online_cpus(&tr->array_buffer);
7161 #ifdef CONFIG_TRACER_MAX_TRACE
7162 if (tr->max_buffer.buffer)
7163 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7164 tracing_reset_online_cpus(&tr->max_buffer);
7167 mutex_unlock(&trace_types_lock);
7172 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7173 size_t cnt, loff_t *fpos)
7175 struct seq_file *m = filp->private_data;
7176 struct trace_array *tr = m->private;
7178 const char *clockstr;
7181 if (cnt >= sizeof(buf))
7184 if (copy_from_user(buf, ubuf, cnt))
7189 clockstr = strstrip(buf);
7191 ret = tracing_set_clock(tr, clockstr);
7200 static int tracing_clock_open(struct inode *inode, struct file *file)
7202 struct trace_array *tr = inode->i_private;
7205 ret = tracing_check_open_get_tr(tr);
7209 ret = single_open(file, tracing_clock_show, inode->i_private);
7211 trace_array_put(tr);
7216 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7218 struct trace_array *tr = m->private;
7220 mutex_lock(&trace_types_lock);
7222 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7223 seq_puts(m, "delta [absolute]\n");
7225 seq_puts(m, "[delta] absolute\n");
7227 mutex_unlock(&trace_types_lock);
7232 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7234 struct trace_array *tr = inode->i_private;
7237 ret = tracing_check_open_get_tr(tr);
7241 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7243 trace_array_put(tr);
7248 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7250 if (rbe == this_cpu_read(trace_buffered_event))
7251 return ring_buffer_time_stamp(buffer);
7253 return ring_buffer_event_time_stamp(buffer, rbe);
7257 * Set or disable using the per CPU trace_buffer_event when possible.
7259 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7263 mutex_lock(&trace_types_lock);
7265 if (set && tr->no_filter_buffering_ref++)
7269 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7274 --tr->no_filter_buffering_ref;
7277 mutex_unlock(&trace_types_lock);
7282 struct ftrace_buffer_info {
7283 struct trace_iterator iter;
7285 unsigned int spare_cpu;
7289 #ifdef CONFIG_TRACER_SNAPSHOT
7290 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7292 struct trace_array *tr = inode->i_private;
7293 struct trace_iterator *iter;
7297 ret = tracing_check_open_get_tr(tr);
7301 if (file->f_mode & FMODE_READ) {
7302 iter = __tracing_open(inode, file, true);
7304 ret = PTR_ERR(iter);
7306 /* Writes still need the seq_file to hold the private data */
7308 m = kzalloc(sizeof(*m), GFP_KERNEL);
7311 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7319 iter->array_buffer = &tr->max_buffer;
7320 iter->cpu_file = tracing_get_cpu(inode);
7322 file->private_data = m;
7326 trace_array_put(tr);
7332 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7335 struct seq_file *m = filp->private_data;
7336 struct trace_iterator *iter = m->private;
7337 struct trace_array *tr = iter->tr;
7341 ret = tracing_update_buffers();
7345 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7349 mutex_lock(&trace_types_lock);
7351 if (tr->current_trace->use_max_tr) {
7356 arch_spin_lock(&tr->max_lock);
7357 if (tr->cond_snapshot)
7359 arch_spin_unlock(&tr->max_lock);
7365 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7369 if (tr->allocated_snapshot)
7373 /* Only allow per-cpu swap if the ring buffer supports it */
7374 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7375 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7380 if (tr->allocated_snapshot)
7381 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7382 &tr->array_buffer, iter->cpu_file);
7384 ret = tracing_alloc_snapshot_instance(tr);
7387 local_irq_disable();
7388 /* Now, we're going to swap */
7389 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7390 update_max_tr(tr, current, smp_processor_id(), NULL);
7392 update_max_tr_single(tr, current, iter->cpu_file);
7396 if (tr->allocated_snapshot) {
7397 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7398 tracing_reset_online_cpus(&tr->max_buffer);
7400 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7410 mutex_unlock(&trace_types_lock);
7414 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7416 struct seq_file *m = file->private_data;
7419 ret = tracing_release(inode, file);
7421 if (file->f_mode & FMODE_READ)
7424 /* If write only, the seq_file is just a stub */
7432 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7433 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7434 size_t count, loff_t *ppos);
7435 static int tracing_buffers_release(struct inode *inode, struct file *file);
7436 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7437 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7439 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7441 struct ftrace_buffer_info *info;
7444 /* The following checks for tracefs lockdown */
7445 ret = tracing_buffers_open(inode, filp);
7449 info = filp->private_data;
7451 if (info->iter.trace->use_max_tr) {
7452 tracing_buffers_release(inode, filp);
7456 info->iter.snapshot = true;
7457 info->iter.array_buffer = &info->iter.tr->max_buffer;
7462 #endif /* CONFIG_TRACER_SNAPSHOT */
7465 static const struct file_operations tracing_thresh_fops = {
7466 .open = tracing_open_generic,
7467 .read = tracing_thresh_read,
7468 .write = tracing_thresh_write,
7469 .llseek = generic_file_llseek,
7472 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7473 static const struct file_operations tracing_max_lat_fops = {
7474 .open = tracing_open_generic,
7475 .read = tracing_max_lat_read,
7476 .write = tracing_max_lat_write,
7477 .llseek = generic_file_llseek,
7481 static const struct file_operations set_tracer_fops = {
7482 .open = tracing_open_generic,
7483 .read = tracing_set_trace_read,
7484 .write = tracing_set_trace_write,
7485 .llseek = generic_file_llseek,
7488 static const struct file_operations tracing_pipe_fops = {
7489 .open = tracing_open_pipe,
7490 .poll = tracing_poll_pipe,
7491 .read = tracing_read_pipe,
7492 .splice_read = tracing_splice_read_pipe,
7493 .release = tracing_release_pipe,
7494 .llseek = no_llseek,
7497 static const struct file_operations tracing_entries_fops = {
7498 .open = tracing_open_generic_tr,
7499 .read = tracing_entries_read,
7500 .write = tracing_entries_write,
7501 .llseek = generic_file_llseek,
7502 .release = tracing_release_generic_tr,
7505 static const struct file_operations tracing_total_entries_fops = {
7506 .open = tracing_open_generic_tr,
7507 .read = tracing_total_entries_read,
7508 .llseek = generic_file_llseek,
7509 .release = tracing_release_generic_tr,
7512 static const struct file_operations tracing_free_buffer_fops = {
7513 .open = tracing_open_generic_tr,
7514 .write = tracing_free_buffer_write,
7515 .release = tracing_free_buffer_release,
7518 static const struct file_operations tracing_mark_fops = {
7519 .open = tracing_open_generic_tr,
7520 .write = tracing_mark_write,
7521 .llseek = generic_file_llseek,
7522 .release = tracing_release_generic_tr,
7525 static const struct file_operations tracing_mark_raw_fops = {
7526 .open = tracing_open_generic_tr,
7527 .write = tracing_mark_raw_write,
7528 .llseek = generic_file_llseek,
7529 .release = tracing_release_generic_tr,
7532 static const struct file_operations trace_clock_fops = {
7533 .open = tracing_clock_open,
7535 .llseek = seq_lseek,
7536 .release = tracing_single_release_tr,
7537 .write = tracing_clock_write,
7540 static const struct file_operations trace_time_stamp_mode_fops = {
7541 .open = tracing_time_stamp_mode_open,
7543 .llseek = seq_lseek,
7544 .release = tracing_single_release_tr,
7547 #ifdef CONFIG_TRACER_SNAPSHOT
7548 static const struct file_operations snapshot_fops = {
7549 .open = tracing_snapshot_open,
7551 .write = tracing_snapshot_write,
7552 .llseek = tracing_lseek,
7553 .release = tracing_snapshot_release,
7556 static const struct file_operations snapshot_raw_fops = {
7557 .open = snapshot_raw_open,
7558 .read = tracing_buffers_read,
7559 .release = tracing_buffers_release,
7560 .splice_read = tracing_buffers_splice_read,
7561 .llseek = no_llseek,
7564 #endif /* CONFIG_TRACER_SNAPSHOT */
7566 #define TRACING_LOG_ERRS_MAX 8
7567 #define TRACING_LOG_LOC_MAX 128
7569 #define CMD_PREFIX " Command: "
7572 const char **errs; /* ptr to loc-specific array of err strings */
7573 u8 type; /* index into errs -> specific err string */
7574 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7578 struct tracing_log_err {
7579 struct list_head list;
7580 struct err_info info;
7581 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7582 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7585 static DEFINE_MUTEX(tracing_err_log_lock);
7587 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7589 struct tracing_log_err *err;
7591 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7592 err = kzalloc(sizeof(*err), GFP_KERNEL);
7594 err = ERR_PTR(-ENOMEM);
7595 tr->n_err_log_entries++;
7600 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7601 list_del(&err->list);
7607 * err_pos - find the position of a string within a command for error careting
7608 * @cmd: The tracing command that caused the error
7609 * @str: The string to position the caret at within @cmd
7611 * Finds the position of the first occurrence of @str within @cmd. The
7612 * return value can be passed to tracing_log_err() for caret placement
7615 * Returns the index within @cmd of the first occurrence of @str or 0
7616 * if @str was not found.
7618 unsigned int err_pos(char *cmd, const char *str)
7622 if (WARN_ON(!strlen(cmd)))
7625 found = strstr(cmd, str);
7633 * tracing_log_err - write an error to the tracing error log
7634 * @tr: The associated trace array for the error (NULL for top level array)
7635 * @loc: A string describing where the error occurred
7636 * @cmd: The tracing command that caused the error
7637 * @errs: The array of loc-specific static error strings
7638 * @type: The index into errs[], which produces the specific static err string
7639 * @pos: The position the caret should be placed in the cmd
7641 * Writes an error into tracing/error_log of the form:
7643 * <loc>: error: <text>
7647 * tracing/error_log is a small log file containing the last
7648 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7649 * unless there has been a tracing error, and the error log can be
7650 * cleared and have its memory freed by writing the empty string in
7651 * truncation mode to it i.e. echo > tracing/error_log.
7653 * NOTE: the @errs array along with the @type param are used to
7654 * produce a static error string - this string is not copied and saved
7655 * when the error is logged - only a pointer to it is saved. See
7656 * existing callers for examples of how static strings are typically
7657 * defined for use with tracing_log_err().
7659 void tracing_log_err(struct trace_array *tr,
7660 const char *loc, const char *cmd,
7661 const char **errs, u8 type, u8 pos)
7663 struct tracing_log_err *err;
7668 mutex_lock(&tracing_err_log_lock);
7669 err = get_tracing_log_err(tr);
7670 if (PTR_ERR(err) == -ENOMEM) {
7671 mutex_unlock(&tracing_err_log_lock);
7675 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7676 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7678 err->info.errs = errs;
7679 err->info.type = type;
7680 err->info.pos = pos;
7681 err->info.ts = local_clock();
7683 list_add_tail(&err->list, &tr->err_log);
7684 mutex_unlock(&tracing_err_log_lock);
7687 static void clear_tracing_err_log(struct trace_array *tr)
7689 struct tracing_log_err *err, *next;
7691 mutex_lock(&tracing_err_log_lock);
7692 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7693 list_del(&err->list);
7697 tr->n_err_log_entries = 0;
7698 mutex_unlock(&tracing_err_log_lock);
7701 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7703 struct trace_array *tr = m->private;
7705 mutex_lock(&tracing_err_log_lock);
7707 return seq_list_start(&tr->err_log, *pos);
7710 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7712 struct trace_array *tr = m->private;
7714 return seq_list_next(v, &tr->err_log, pos);
7717 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7719 mutex_unlock(&tracing_err_log_lock);
7722 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7726 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7728 for (i = 0; i < pos; i++)
7733 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7735 struct tracing_log_err *err = v;
7738 const char *err_text = err->info.errs[err->info.type];
7739 u64 sec = err->info.ts;
7742 nsec = do_div(sec, NSEC_PER_SEC);
7743 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7744 err->loc, err_text);
7745 seq_printf(m, "%s", err->cmd);
7746 tracing_err_log_show_pos(m, err->info.pos);
7752 static const struct seq_operations tracing_err_log_seq_ops = {
7753 .start = tracing_err_log_seq_start,
7754 .next = tracing_err_log_seq_next,
7755 .stop = tracing_err_log_seq_stop,
7756 .show = tracing_err_log_seq_show
7759 static int tracing_err_log_open(struct inode *inode, struct file *file)
7761 struct trace_array *tr = inode->i_private;
7764 ret = tracing_check_open_get_tr(tr);
7768 /* If this file was opened for write, then erase contents */
7769 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7770 clear_tracing_err_log(tr);
7772 if (file->f_mode & FMODE_READ) {
7773 ret = seq_open(file, &tracing_err_log_seq_ops);
7775 struct seq_file *m = file->private_data;
7778 trace_array_put(tr);
7784 static ssize_t tracing_err_log_write(struct file *file,
7785 const char __user *buffer,
7786 size_t count, loff_t *ppos)
7791 static int tracing_err_log_release(struct inode *inode, struct file *file)
7793 struct trace_array *tr = inode->i_private;
7795 trace_array_put(tr);
7797 if (file->f_mode & FMODE_READ)
7798 seq_release(inode, file);
7803 static const struct file_operations tracing_err_log_fops = {
7804 .open = tracing_err_log_open,
7805 .write = tracing_err_log_write,
7807 .llseek = seq_lseek,
7808 .release = tracing_err_log_release,
7811 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7813 struct trace_array *tr = inode->i_private;
7814 struct ftrace_buffer_info *info;
7817 ret = tracing_check_open_get_tr(tr);
7821 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7823 trace_array_put(tr);
7827 mutex_lock(&trace_types_lock);
7830 info->iter.cpu_file = tracing_get_cpu(inode);
7831 info->iter.trace = tr->current_trace;
7832 info->iter.array_buffer = &tr->array_buffer;
7834 /* Force reading ring buffer for first read */
7835 info->read = (unsigned int)-1;
7837 filp->private_data = info;
7841 mutex_unlock(&trace_types_lock);
7843 ret = nonseekable_open(inode, filp);
7845 trace_array_put(tr);
7851 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7853 struct ftrace_buffer_info *info = filp->private_data;
7854 struct trace_iterator *iter = &info->iter;
7856 return trace_poll(iter, filp, poll_table);
7860 tracing_buffers_read(struct file *filp, char __user *ubuf,
7861 size_t count, loff_t *ppos)
7863 struct ftrace_buffer_info *info = filp->private_data;
7864 struct trace_iterator *iter = &info->iter;
7871 #ifdef CONFIG_TRACER_MAX_TRACE
7872 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7877 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7879 if (IS_ERR(info->spare)) {
7880 ret = PTR_ERR(info->spare);
7883 info->spare_cpu = iter->cpu_file;
7889 /* Do we have previous read data to read? */
7890 if (info->read < PAGE_SIZE)
7894 trace_access_lock(iter->cpu_file);
7895 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7899 trace_access_unlock(iter->cpu_file);
7902 if (trace_empty(iter)) {
7903 if ((filp->f_flags & O_NONBLOCK))
7906 ret = wait_on_pipe(iter, 0);
7917 size = PAGE_SIZE - info->read;
7921 ret = copy_to_user(ubuf, info->spare + info->read, size);
7933 static int tracing_buffers_release(struct inode *inode, struct file *file)
7935 struct ftrace_buffer_info *info = file->private_data;
7936 struct trace_iterator *iter = &info->iter;
7938 mutex_lock(&trace_types_lock);
7940 iter->tr->trace_ref--;
7942 __trace_array_put(iter->tr);
7945 ring_buffer_free_read_page(iter->array_buffer->buffer,
7946 info->spare_cpu, info->spare);
7949 mutex_unlock(&trace_types_lock);
7955 struct trace_buffer *buffer;
7958 refcount_t refcount;
7961 static void buffer_ref_release(struct buffer_ref *ref)
7963 if (!refcount_dec_and_test(&ref->refcount))
7965 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7969 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7970 struct pipe_buffer *buf)
7972 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7974 buffer_ref_release(ref);
7978 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7979 struct pipe_buffer *buf)
7981 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7983 if (refcount_read(&ref->refcount) > INT_MAX/2)
7986 refcount_inc(&ref->refcount);
7990 /* Pipe buffer operations for a buffer. */
7991 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7992 .release = buffer_pipe_buf_release,
7993 .get = buffer_pipe_buf_get,
7997 * Callback from splice_to_pipe(), if we need to release some pages
7998 * at the end of the spd in case we error'ed out in filling the pipe.
8000 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8002 struct buffer_ref *ref =
8003 (struct buffer_ref *)spd->partial[i].private;
8005 buffer_ref_release(ref);
8006 spd->partial[i].private = 0;
8010 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8011 struct pipe_inode_info *pipe, size_t len,
8014 struct ftrace_buffer_info *info = file->private_data;
8015 struct trace_iterator *iter = &info->iter;
8016 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8017 struct page *pages_def[PIPE_DEF_BUFFERS];
8018 struct splice_pipe_desc spd = {
8020 .partial = partial_def,
8021 .nr_pages_max = PIPE_DEF_BUFFERS,
8022 .ops = &buffer_pipe_buf_ops,
8023 .spd_release = buffer_spd_release,
8025 struct buffer_ref *ref;
8029 #ifdef CONFIG_TRACER_MAX_TRACE
8030 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8034 if (*ppos & (PAGE_SIZE - 1))
8037 if (len & (PAGE_SIZE - 1)) {
8038 if (len < PAGE_SIZE)
8043 if (splice_grow_spd(pipe, &spd))
8047 trace_access_lock(iter->cpu_file);
8048 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8050 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8054 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8060 refcount_set(&ref->refcount, 1);
8061 ref->buffer = iter->array_buffer->buffer;
8062 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8063 if (IS_ERR(ref->page)) {
8064 ret = PTR_ERR(ref->page);
8069 ref->cpu = iter->cpu_file;
8071 r = ring_buffer_read_page(ref->buffer, &ref->page,
8072 len, iter->cpu_file, 1);
8074 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8080 page = virt_to_page(ref->page);
8082 spd.pages[i] = page;
8083 spd.partial[i].len = PAGE_SIZE;
8084 spd.partial[i].offset = 0;
8085 spd.partial[i].private = (unsigned long)ref;
8089 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8092 trace_access_unlock(iter->cpu_file);
8095 /* did we read anything? */
8096 if (!spd.nr_pages) {
8101 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8104 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8111 ret = splice_to_pipe(pipe, &spd);
8113 splice_shrink_spd(&spd);
8118 static const struct file_operations tracing_buffers_fops = {
8119 .open = tracing_buffers_open,
8120 .read = tracing_buffers_read,
8121 .poll = tracing_buffers_poll,
8122 .release = tracing_buffers_release,
8123 .splice_read = tracing_buffers_splice_read,
8124 .llseek = no_llseek,
8128 tracing_stats_read(struct file *filp, char __user *ubuf,
8129 size_t count, loff_t *ppos)
8131 struct inode *inode = file_inode(filp);
8132 struct trace_array *tr = inode->i_private;
8133 struct array_buffer *trace_buf = &tr->array_buffer;
8134 int cpu = tracing_get_cpu(inode);
8135 struct trace_seq *s;
8137 unsigned long long t;
8138 unsigned long usec_rem;
8140 s = kmalloc(sizeof(*s), GFP_KERNEL);
8146 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8147 trace_seq_printf(s, "entries: %ld\n", cnt);
8149 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8150 trace_seq_printf(s, "overrun: %ld\n", cnt);
8152 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8153 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8155 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8156 trace_seq_printf(s, "bytes: %ld\n", cnt);
8158 if (trace_clocks[tr->clock_id].in_ns) {
8159 /* local or global for trace_clock */
8160 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8161 usec_rem = do_div(t, USEC_PER_SEC);
8162 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8165 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8166 usec_rem = do_div(t, USEC_PER_SEC);
8167 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8169 /* counter or tsc mode for trace_clock */
8170 trace_seq_printf(s, "oldest event ts: %llu\n",
8171 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8173 trace_seq_printf(s, "now ts: %llu\n",
8174 ring_buffer_time_stamp(trace_buf->buffer));
8177 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8178 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8180 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8181 trace_seq_printf(s, "read events: %ld\n", cnt);
8183 count = simple_read_from_buffer(ubuf, count, ppos,
8184 s->buffer, trace_seq_used(s));
8191 static const struct file_operations tracing_stats_fops = {
8192 .open = tracing_open_generic_tr,
8193 .read = tracing_stats_read,
8194 .llseek = generic_file_llseek,
8195 .release = tracing_release_generic_tr,
8198 #ifdef CONFIG_DYNAMIC_FTRACE
8201 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8202 size_t cnt, loff_t *ppos)
8208 /* 256 should be plenty to hold the amount needed */
8209 buf = kmalloc(256, GFP_KERNEL);
8213 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8214 ftrace_update_tot_cnt,
8215 ftrace_number_of_pages,
8216 ftrace_number_of_groups);
8218 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8223 static const struct file_operations tracing_dyn_info_fops = {
8224 .open = tracing_open_generic,
8225 .read = tracing_read_dyn_info,
8226 .llseek = generic_file_llseek,
8228 #endif /* CONFIG_DYNAMIC_FTRACE */
8230 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8232 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8233 struct trace_array *tr, struct ftrace_probe_ops *ops,
8236 tracing_snapshot_instance(tr);
8240 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8241 struct trace_array *tr, struct ftrace_probe_ops *ops,
8244 struct ftrace_func_mapper *mapper = data;
8248 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8258 tracing_snapshot_instance(tr);
8262 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8263 struct ftrace_probe_ops *ops, void *data)
8265 struct ftrace_func_mapper *mapper = data;
8268 seq_printf(m, "%ps:", (void *)ip);
8270 seq_puts(m, "snapshot");
8273 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8276 seq_printf(m, ":count=%ld\n", *count);
8278 seq_puts(m, ":unlimited\n");
8284 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8285 unsigned long ip, void *init_data, void **data)
8287 struct ftrace_func_mapper *mapper = *data;
8290 mapper = allocate_ftrace_func_mapper();
8296 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8300 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8301 unsigned long ip, void *data)
8303 struct ftrace_func_mapper *mapper = data;
8308 free_ftrace_func_mapper(mapper, NULL);
8312 ftrace_func_mapper_remove_ip(mapper, ip);
8315 static struct ftrace_probe_ops snapshot_probe_ops = {
8316 .func = ftrace_snapshot,
8317 .print = ftrace_snapshot_print,
8320 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8321 .func = ftrace_count_snapshot,
8322 .print = ftrace_snapshot_print,
8323 .init = ftrace_snapshot_init,
8324 .free = ftrace_snapshot_free,
8328 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8329 char *glob, char *cmd, char *param, int enable)
8331 struct ftrace_probe_ops *ops;
8332 void *count = (void *)-1;
8339 /* hash funcs only work with set_ftrace_filter */
8343 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8346 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8351 number = strsep(¶m, ":");
8353 if (!strlen(number))
8357 * We use the callback data field (which is a pointer)
8360 ret = kstrtoul(number, 0, (unsigned long *)&count);
8365 ret = tracing_alloc_snapshot_instance(tr);
8369 ret = register_ftrace_function_probe(glob, tr, ops, count);
8372 return ret < 0 ? ret : 0;
8375 static struct ftrace_func_command ftrace_snapshot_cmd = {
8377 .func = ftrace_trace_snapshot_callback,
8380 static __init int register_snapshot_cmd(void)
8382 return register_ftrace_command(&ftrace_snapshot_cmd);
8385 static inline __init int register_snapshot_cmd(void) { return 0; }
8386 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8388 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8390 if (WARN_ON(!tr->dir))
8391 return ERR_PTR(-ENODEV);
8393 /* Top directory uses NULL as the parent */
8394 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8397 /* All sub buffers have a descriptor */
8401 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8403 struct dentry *d_tracer;
8406 return tr->percpu_dir;
8408 d_tracer = tracing_get_dentry(tr);
8409 if (IS_ERR(d_tracer))
8412 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8414 MEM_FAIL(!tr->percpu_dir,
8415 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8417 return tr->percpu_dir;
8420 static struct dentry *
8421 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8422 void *data, long cpu, const struct file_operations *fops)
8424 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8426 if (ret) /* See tracing_get_cpu() */
8427 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8432 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8434 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8435 struct dentry *d_cpu;
8436 char cpu_dir[30]; /* 30 characters should be more than enough */
8441 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8442 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8444 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8448 /* per cpu trace_pipe */
8449 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8450 tr, cpu, &tracing_pipe_fops);
8453 trace_create_cpu_file("trace", 0644, d_cpu,
8454 tr, cpu, &tracing_fops);
8456 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8457 tr, cpu, &tracing_buffers_fops);
8459 trace_create_cpu_file("stats", 0444, d_cpu,
8460 tr, cpu, &tracing_stats_fops);
8462 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8463 tr, cpu, &tracing_entries_fops);
8465 #ifdef CONFIG_TRACER_SNAPSHOT
8466 trace_create_cpu_file("snapshot", 0644, d_cpu,
8467 tr, cpu, &snapshot_fops);
8469 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8470 tr, cpu, &snapshot_raw_fops);
8474 #ifdef CONFIG_FTRACE_SELFTEST
8475 /* Let selftest have access to static functions in this file */
8476 #include "trace_selftest.c"
8480 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8483 struct trace_option_dentry *topt = filp->private_data;
8486 if (topt->flags->val & topt->opt->bit)
8491 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8495 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8498 struct trace_option_dentry *topt = filp->private_data;
8502 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8506 if (val != 0 && val != 1)
8509 if (!!(topt->flags->val & topt->opt->bit) != val) {
8510 mutex_lock(&trace_types_lock);
8511 ret = __set_tracer_option(topt->tr, topt->flags,
8513 mutex_unlock(&trace_types_lock);
8524 static const struct file_operations trace_options_fops = {
8525 .open = tracing_open_generic,
8526 .read = trace_options_read,
8527 .write = trace_options_write,
8528 .llseek = generic_file_llseek,
8532 * In order to pass in both the trace_array descriptor as well as the index
8533 * to the flag that the trace option file represents, the trace_array
8534 * has a character array of trace_flags_index[], which holds the index
8535 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8536 * The address of this character array is passed to the flag option file
8537 * read/write callbacks.
8539 * In order to extract both the index and the trace_array descriptor,
8540 * get_tr_index() uses the following algorithm.
8544 * As the pointer itself contains the address of the index (remember
8547 * Then to get the trace_array descriptor, by subtracting that index
8548 * from the ptr, we get to the start of the index itself.
8550 * ptr - idx == &index[0]
8552 * Then a simple container_of() from that pointer gets us to the
8553 * trace_array descriptor.
8555 static void get_tr_index(void *data, struct trace_array **ptr,
8556 unsigned int *pindex)
8558 *pindex = *(unsigned char *)data;
8560 *ptr = container_of(data - *pindex, struct trace_array,
8565 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8568 void *tr_index = filp->private_data;
8569 struct trace_array *tr;
8573 get_tr_index(tr_index, &tr, &index);
8575 if (tr->trace_flags & (1 << index))
8580 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8584 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8587 void *tr_index = filp->private_data;
8588 struct trace_array *tr;
8593 get_tr_index(tr_index, &tr, &index);
8595 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8599 if (val != 0 && val != 1)
8602 mutex_lock(&event_mutex);
8603 mutex_lock(&trace_types_lock);
8604 ret = set_tracer_flag(tr, 1 << index, val);
8605 mutex_unlock(&trace_types_lock);
8606 mutex_unlock(&event_mutex);
8616 static const struct file_operations trace_options_core_fops = {
8617 .open = tracing_open_generic,
8618 .read = trace_options_core_read,
8619 .write = trace_options_core_write,
8620 .llseek = generic_file_llseek,
8623 struct dentry *trace_create_file(const char *name,
8625 struct dentry *parent,
8627 const struct file_operations *fops)
8631 ret = tracefs_create_file(name, mode, parent, data, fops);
8633 pr_warn("Could not create tracefs '%s' entry\n", name);
8639 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8641 struct dentry *d_tracer;
8646 d_tracer = tracing_get_dentry(tr);
8647 if (IS_ERR(d_tracer))
8650 tr->options = tracefs_create_dir("options", d_tracer);
8652 pr_warn("Could not create tracefs directory 'options'\n");
8660 create_trace_option_file(struct trace_array *tr,
8661 struct trace_option_dentry *topt,
8662 struct tracer_flags *flags,
8663 struct tracer_opt *opt)
8665 struct dentry *t_options;
8667 t_options = trace_options_init_dentry(tr);
8671 topt->flags = flags;
8675 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8676 &trace_options_fops);
8681 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8683 struct trace_option_dentry *topts;
8684 struct trace_options *tr_topts;
8685 struct tracer_flags *flags;
8686 struct tracer_opt *opts;
8693 flags = tracer->flags;
8695 if (!flags || !flags->opts)
8699 * If this is an instance, only create flags for tracers
8700 * the instance may have.
8702 if (!trace_ok_for_array(tracer, tr))
8705 for (i = 0; i < tr->nr_topts; i++) {
8706 /* Make sure there's no duplicate flags. */
8707 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8713 for (cnt = 0; opts[cnt].name; cnt++)
8716 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8720 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8727 tr->topts = tr_topts;
8728 tr->topts[tr->nr_topts].tracer = tracer;
8729 tr->topts[tr->nr_topts].topts = topts;
8732 for (cnt = 0; opts[cnt].name; cnt++) {
8733 create_trace_option_file(tr, &topts[cnt], flags,
8735 MEM_FAIL(topts[cnt].entry == NULL,
8736 "Failed to create trace option: %s",
8741 static struct dentry *
8742 create_trace_option_core_file(struct trace_array *tr,
8743 const char *option, long index)
8745 struct dentry *t_options;
8747 t_options = trace_options_init_dentry(tr);
8751 return trace_create_file(option, 0644, t_options,
8752 (void *)&tr->trace_flags_index[index],
8753 &trace_options_core_fops);
8756 static void create_trace_options_dir(struct trace_array *tr)
8758 struct dentry *t_options;
8759 bool top_level = tr == &global_trace;
8762 t_options = trace_options_init_dentry(tr);
8766 for (i = 0; trace_options[i]; i++) {
8768 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8769 create_trace_option_core_file(tr, trace_options[i], i);
8774 rb_simple_read(struct file *filp, char __user *ubuf,
8775 size_t cnt, loff_t *ppos)
8777 struct trace_array *tr = filp->private_data;
8781 r = tracer_tracing_is_on(tr);
8782 r = sprintf(buf, "%d\n", r);
8784 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8788 rb_simple_write(struct file *filp, const char __user *ubuf,
8789 size_t cnt, loff_t *ppos)
8791 struct trace_array *tr = filp->private_data;
8792 struct trace_buffer *buffer = tr->array_buffer.buffer;
8796 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8801 mutex_lock(&trace_types_lock);
8802 if (!!val == tracer_tracing_is_on(tr)) {
8803 val = 0; /* do nothing */
8805 tracer_tracing_on(tr);
8806 if (tr->current_trace->start)
8807 tr->current_trace->start(tr);
8809 tracer_tracing_off(tr);
8810 if (tr->current_trace->stop)
8811 tr->current_trace->stop(tr);
8813 mutex_unlock(&trace_types_lock);
8821 static const struct file_operations rb_simple_fops = {
8822 .open = tracing_open_generic_tr,
8823 .read = rb_simple_read,
8824 .write = rb_simple_write,
8825 .release = tracing_release_generic_tr,
8826 .llseek = default_llseek,
8830 buffer_percent_read(struct file *filp, char __user *ubuf,
8831 size_t cnt, loff_t *ppos)
8833 struct trace_array *tr = filp->private_data;
8837 r = tr->buffer_percent;
8838 r = sprintf(buf, "%d\n", r);
8840 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8844 buffer_percent_write(struct file *filp, const char __user *ubuf,
8845 size_t cnt, loff_t *ppos)
8847 struct trace_array *tr = filp->private_data;
8851 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8861 tr->buffer_percent = val;
8868 static const struct file_operations buffer_percent_fops = {
8869 .open = tracing_open_generic_tr,
8870 .read = buffer_percent_read,
8871 .write = buffer_percent_write,
8872 .release = tracing_release_generic_tr,
8873 .llseek = default_llseek,
8876 static struct dentry *trace_instance_dir;
8879 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8882 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8884 enum ring_buffer_flags rb_flags;
8886 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8890 buf->buffer = ring_buffer_alloc(size, rb_flags);
8894 buf->data = alloc_percpu(struct trace_array_cpu);
8896 ring_buffer_free(buf->buffer);
8901 /* Allocate the first page for all buffers */
8902 set_buffer_entries(&tr->array_buffer,
8903 ring_buffer_size(tr->array_buffer.buffer, 0));
8908 static int allocate_trace_buffers(struct trace_array *tr, int size)
8912 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8916 #ifdef CONFIG_TRACER_MAX_TRACE
8917 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8918 allocate_snapshot ? size : 1);
8919 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8920 ring_buffer_free(tr->array_buffer.buffer);
8921 tr->array_buffer.buffer = NULL;
8922 free_percpu(tr->array_buffer.data);
8923 tr->array_buffer.data = NULL;
8926 tr->allocated_snapshot = allocate_snapshot;
8929 * Only the top level trace array gets its snapshot allocated
8930 * from the kernel command line.
8932 allocate_snapshot = false;
8938 static void free_trace_buffer(struct array_buffer *buf)
8941 ring_buffer_free(buf->buffer);
8943 free_percpu(buf->data);
8948 static void free_trace_buffers(struct trace_array *tr)
8953 free_trace_buffer(&tr->array_buffer);
8955 #ifdef CONFIG_TRACER_MAX_TRACE
8956 free_trace_buffer(&tr->max_buffer);
8960 static void init_trace_flags_index(struct trace_array *tr)
8964 /* Used by the trace options files */
8965 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8966 tr->trace_flags_index[i] = i;
8969 static void __update_tracer_options(struct trace_array *tr)
8973 for (t = trace_types; t; t = t->next)
8974 add_tracer_options(tr, t);
8977 static void update_tracer_options(struct trace_array *tr)
8979 mutex_lock(&trace_types_lock);
8980 __update_tracer_options(tr);
8981 mutex_unlock(&trace_types_lock);
8984 /* Must have trace_types_lock held */
8985 struct trace_array *trace_array_find(const char *instance)
8987 struct trace_array *tr, *found = NULL;
8989 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8990 if (tr->name && strcmp(tr->name, instance) == 0) {
8999 struct trace_array *trace_array_find_get(const char *instance)
9001 struct trace_array *tr;
9003 mutex_lock(&trace_types_lock);
9004 tr = trace_array_find(instance);
9007 mutex_unlock(&trace_types_lock);
9012 static int trace_array_create_dir(struct trace_array *tr)
9016 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9020 ret = event_trace_add_tracer(tr->dir, tr);
9022 tracefs_remove(tr->dir);
9026 init_tracer_tracefs(tr, tr->dir);
9027 __update_tracer_options(tr);
9032 static struct trace_array *trace_array_create(const char *name)
9034 struct trace_array *tr;
9038 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9040 return ERR_PTR(ret);
9042 tr->name = kstrdup(name, GFP_KERNEL);
9046 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9049 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9051 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9053 raw_spin_lock_init(&tr->start_lock);
9055 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9057 tr->current_trace = &nop_trace;
9059 INIT_LIST_HEAD(&tr->systems);
9060 INIT_LIST_HEAD(&tr->events);
9061 INIT_LIST_HEAD(&tr->hist_vars);
9062 INIT_LIST_HEAD(&tr->err_log);
9064 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9067 if (ftrace_allocate_ftrace_ops(tr) < 0)
9070 ftrace_init_trace_array(tr);
9072 init_trace_flags_index(tr);
9074 if (trace_instance_dir) {
9075 ret = trace_array_create_dir(tr);
9079 __trace_early_add_events(tr);
9081 list_add(&tr->list, &ftrace_trace_arrays);
9088 ftrace_free_ftrace_ops(tr);
9089 free_trace_buffers(tr);
9090 free_cpumask_var(tr->tracing_cpumask);
9094 return ERR_PTR(ret);
9097 static int instance_mkdir(const char *name)
9099 struct trace_array *tr;
9102 mutex_lock(&event_mutex);
9103 mutex_lock(&trace_types_lock);
9106 if (trace_array_find(name))
9109 tr = trace_array_create(name);
9111 ret = PTR_ERR_OR_ZERO(tr);
9114 mutex_unlock(&trace_types_lock);
9115 mutex_unlock(&event_mutex);
9120 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9121 * @name: The name of the trace array to be looked up/created.
9123 * Returns pointer to trace array with given name.
9124 * NULL, if it cannot be created.
9126 * NOTE: This function increments the reference counter associated with the
9127 * trace array returned. This makes sure it cannot be freed while in use.
9128 * Use trace_array_put() once the trace array is no longer needed.
9129 * If the trace_array is to be freed, trace_array_destroy() needs to
9130 * be called after the trace_array_put(), or simply let user space delete
9131 * it from the tracefs instances directory. But until the
9132 * trace_array_put() is called, user space can not delete it.
9135 struct trace_array *trace_array_get_by_name(const char *name)
9137 struct trace_array *tr;
9139 mutex_lock(&event_mutex);
9140 mutex_lock(&trace_types_lock);
9142 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9143 if (tr->name && strcmp(tr->name, name) == 0)
9147 tr = trace_array_create(name);
9155 mutex_unlock(&trace_types_lock);
9156 mutex_unlock(&event_mutex);
9159 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9161 static int __remove_instance(struct trace_array *tr)
9165 /* Reference counter for a newly created trace array = 1. */
9166 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9169 list_del(&tr->list);
9171 /* Disable all the flags that were enabled coming in */
9172 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9173 if ((1 << i) & ZEROED_TRACE_FLAGS)
9174 set_tracer_flag(tr, 1 << i, 0);
9177 tracing_set_nop(tr);
9178 clear_ftrace_function_probes(tr);
9179 event_trace_del_tracer(tr);
9180 ftrace_clear_pids(tr);
9181 ftrace_destroy_function_files(tr);
9182 tracefs_remove(tr->dir);
9183 free_percpu(tr->last_func_repeats);
9184 free_trace_buffers(tr);
9186 for (i = 0; i < tr->nr_topts; i++) {
9187 kfree(tr->topts[i].topts);
9191 free_cpumask_var(tr->tracing_cpumask);
9198 int trace_array_destroy(struct trace_array *this_tr)
9200 struct trace_array *tr;
9206 mutex_lock(&event_mutex);
9207 mutex_lock(&trace_types_lock);
9211 /* Making sure trace array exists before destroying it. */
9212 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9213 if (tr == this_tr) {
9214 ret = __remove_instance(tr);
9219 mutex_unlock(&trace_types_lock);
9220 mutex_unlock(&event_mutex);
9224 EXPORT_SYMBOL_GPL(trace_array_destroy);
9226 static int instance_rmdir(const char *name)
9228 struct trace_array *tr;
9231 mutex_lock(&event_mutex);
9232 mutex_lock(&trace_types_lock);
9235 tr = trace_array_find(name);
9237 ret = __remove_instance(tr);
9239 mutex_unlock(&trace_types_lock);
9240 mutex_unlock(&event_mutex);
9245 static __init void create_trace_instances(struct dentry *d_tracer)
9247 struct trace_array *tr;
9249 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9252 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9255 mutex_lock(&event_mutex);
9256 mutex_lock(&trace_types_lock);
9258 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9261 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9262 "Failed to create instance directory\n"))
9266 mutex_unlock(&trace_types_lock);
9267 mutex_unlock(&event_mutex);
9271 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9273 struct trace_event_file *file;
9276 trace_create_file("available_tracers", 0444, d_tracer,
9277 tr, &show_traces_fops);
9279 trace_create_file("current_tracer", 0644, d_tracer,
9280 tr, &set_tracer_fops);
9282 trace_create_file("tracing_cpumask", 0644, d_tracer,
9283 tr, &tracing_cpumask_fops);
9285 trace_create_file("trace_options", 0644, d_tracer,
9286 tr, &tracing_iter_fops);
9288 trace_create_file("trace", 0644, d_tracer,
9291 trace_create_file("trace_pipe", 0444, d_tracer,
9292 tr, &tracing_pipe_fops);
9294 trace_create_file("buffer_size_kb", 0644, d_tracer,
9295 tr, &tracing_entries_fops);
9297 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9298 tr, &tracing_total_entries_fops);
9300 trace_create_file("free_buffer", 0200, d_tracer,
9301 tr, &tracing_free_buffer_fops);
9303 trace_create_file("trace_marker", 0220, d_tracer,
9304 tr, &tracing_mark_fops);
9306 file = __find_event_file(tr, "ftrace", "print");
9307 if (file && file->dir)
9308 trace_create_file("trigger", 0644, file->dir, file,
9309 &event_trigger_fops);
9310 tr->trace_marker_file = file;
9312 trace_create_file("trace_marker_raw", 0220, d_tracer,
9313 tr, &tracing_mark_raw_fops);
9315 trace_create_file("trace_clock", 0644, d_tracer, tr,
9318 trace_create_file("tracing_on", 0644, d_tracer,
9319 tr, &rb_simple_fops);
9321 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9322 &trace_time_stamp_mode_fops);
9324 tr->buffer_percent = 50;
9326 trace_create_file("buffer_percent", 0444, d_tracer,
9327 tr, &buffer_percent_fops);
9329 create_trace_options_dir(tr);
9331 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9332 trace_create_maxlat_file(tr, d_tracer);
9335 if (ftrace_create_function_files(tr, d_tracer))
9336 MEM_FAIL(1, "Could not allocate function filter files");
9338 #ifdef CONFIG_TRACER_SNAPSHOT
9339 trace_create_file("snapshot", 0644, d_tracer,
9340 tr, &snapshot_fops);
9343 trace_create_file("error_log", 0644, d_tracer,
9344 tr, &tracing_err_log_fops);
9346 for_each_tracing_cpu(cpu)
9347 tracing_init_tracefs_percpu(tr, cpu);
9349 ftrace_init_tracefs(tr, d_tracer);
9352 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9354 struct vfsmount *mnt;
9355 struct file_system_type *type;
9358 * To maintain backward compatibility for tools that mount
9359 * debugfs to get to the tracing facility, tracefs is automatically
9360 * mounted to the debugfs/tracing directory.
9362 type = get_fs_type("tracefs");
9365 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9366 put_filesystem(type);
9375 * tracing_init_dentry - initialize top level trace array
9377 * This is called when creating files or directories in the tracing
9378 * directory. It is called via fs_initcall() by any of the boot up code
9379 * and expects to return the dentry of the top level tracing directory.
9381 int tracing_init_dentry(void)
9383 struct trace_array *tr = &global_trace;
9385 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9386 pr_warn("Tracing disabled due to lockdown\n");
9390 /* The top level trace array uses NULL as parent */
9394 if (WARN_ON(!tracefs_initialized()))
9398 * As there may still be users that expect the tracing
9399 * files to exist in debugfs/tracing, we must automount
9400 * the tracefs file system there, so older tools still
9401 * work with the newer kernel.
9403 tr->dir = debugfs_create_automount("tracing", NULL,
9404 trace_automount, NULL);
9409 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9410 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9412 static struct workqueue_struct *eval_map_wq __initdata;
9413 static struct work_struct eval_map_work __initdata;
9415 static void __init eval_map_work_func(struct work_struct *work)
9419 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9420 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9423 static int __init trace_eval_init(void)
9425 INIT_WORK(&eval_map_work, eval_map_work_func);
9427 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9429 pr_err("Unable to allocate eval_map_wq\n");
9431 eval_map_work_func(&eval_map_work);
9435 queue_work(eval_map_wq, &eval_map_work);
9439 static int __init trace_eval_sync(void)
9441 /* Make sure the eval map updates are finished */
9443 destroy_workqueue(eval_map_wq);
9447 late_initcall_sync(trace_eval_sync);
9450 #ifdef CONFIG_MODULES
9451 static void trace_module_add_evals(struct module *mod)
9453 if (!mod->num_trace_evals)
9457 * Modules with bad taint do not have events created, do
9458 * not bother with enums either.
9460 if (trace_module_has_bad_taint(mod))
9463 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9466 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9467 static void trace_module_remove_evals(struct module *mod)
9469 union trace_eval_map_item *map;
9470 union trace_eval_map_item **last = &trace_eval_maps;
9472 if (!mod->num_trace_evals)
9475 mutex_lock(&trace_eval_mutex);
9477 map = trace_eval_maps;
9480 if (map->head.mod == mod)
9482 map = trace_eval_jmp_to_tail(map);
9483 last = &map->tail.next;
9484 map = map->tail.next;
9489 *last = trace_eval_jmp_to_tail(map)->tail.next;
9492 mutex_unlock(&trace_eval_mutex);
9495 static inline void trace_module_remove_evals(struct module *mod) { }
9496 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9498 static int trace_module_notify(struct notifier_block *self,
9499 unsigned long val, void *data)
9501 struct module *mod = data;
9504 case MODULE_STATE_COMING:
9505 trace_module_add_evals(mod);
9507 case MODULE_STATE_GOING:
9508 trace_module_remove_evals(mod);
9515 static struct notifier_block trace_module_nb = {
9516 .notifier_call = trace_module_notify,
9519 #endif /* CONFIG_MODULES */
9521 static __init int tracer_init_tracefs(void)
9525 trace_access_lock_init();
9527 ret = tracing_init_dentry();
9533 init_tracer_tracefs(&global_trace, NULL);
9534 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9536 trace_create_file("tracing_thresh", 0644, NULL,
9537 &global_trace, &tracing_thresh_fops);
9539 trace_create_file("README", 0444, NULL,
9540 NULL, &tracing_readme_fops);
9542 trace_create_file("saved_cmdlines", 0444, NULL,
9543 NULL, &tracing_saved_cmdlines_fops);
9545 trace_create_file("saved_cmdlines_size", 0644, NULL,
9546 NULL, &tracing_saved_cmdlines_size_fops);
9548 trace_create_file("saved_tgids", 0444, NULL,
9549 NULL, &tracing_saved_tgids_fops);
9553 trace_create_eval_file(NULL);
9555 #ifdef CONFIG_MODULES
9556 register_module_notifier(&trace_module_nb);
9559 #ifdef CONFIG_DYNAMIC_FTRACE
9560 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9561 NULL, &tracing_dyn_info_fops);
9564 create_trace_instances(NULL);
9566 update_tracer_options(&global_trace);
9571 static int trace_panic_handler(struct notifier_block *this,
9572 unsigned long event, void *unused)
9574 if (ftrace_dump_on_oops)
9575 ftrace_dump(ftrace_dump_on_oops);
9579 static struct notifier_block trace_panic_notifier = {
9580 .notifier_call = trace_panic_handler,
9582 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9585 static int trace_die_handler(struct notifier_block *self,
9591 if (ftrace_dump_on_oops)
9592 ftrace_dump(ftrace_dump_on_oops);
9600 static struct notifier_block trace_die_notifier = {
9601 .notifier_call = trace_die_handler,
9606 * printk is set to max of 1024, we really don't need it that big.
9607 * Nothing should be printing 1000 characters anyway.
9609 #define TRACE_MAX_PRINT 1000
9612 * Define here KERN_TRACE so that we have one place to modify
9613 * it if we decide to change what log level the ftrace dump
9616 #define KERN_TRACE KERN_EMERG
9619 trace_printk_seq(struct trace_seq *s)
9621 /* Probably should print a warning here. */
9622 if (s->seq.len >= TRACE_MAX_PRINT)
9623 s->seq.len = TRACE_MAX_PRINT;
9626 * More paranoid code. Although the buffer size is set to
9627 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9628 * an extra layer of protection.
9630 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9631 s->seq.len = s->seq.size - 1;
9633 /* should be zero ended, but we are paranoid. */
9634 s->buffer[s->seq.len] = 0;
9636 printk(KERN_TRACE "%s", s->buffer);
9641 void trace_init_global_iter(struct trace_iterator *iter)
9643 iter->tr = &global_trace;
9644 iter->trace = iter->tr->current_trace;
9645 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9646 iter->array_buffer = &global_trace.array_buffer;
9648 if (iter->trace && iter->trace->open)
9649 iter->trace->open(iter);
9651 /* Annotate start of buffers if we had overruns */
9652 if (ring_buffer_overruns(iter->array_buffer->buffer))
9653 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9655 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9656 if (trace_clocks[iter->tr->clock_id].in_ns)
9657 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9660 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9662 /* use static because iter can be a bit big for the stack */
9663 static struct trace_iterator iter;
9664 static atomic_t dump_running;
9665 struct trace_array *tr = &global_trace;
9666 unsigned int old_userobj;
9667 unsigned long flags;
9670 /* Only allow one dump user at a time. */
9671 if (atomic_inc_return(&dump_running) != 1) {
9672 atomic_dec(&dump_running);
9677 * Always turn off tracing when we dump.
9678 * We don't need to show trace output of what happens
9679 * between multiple crashes.
9681 * If the user does a sysrq-z, then they can re-enable
9682 * tracing with echo 1 > tracing_on.
9686 local_irq_save(flags);
9687 printk_nmi_direct_enter();
9689 /* Simulate the iterator */
9690 trace_init_global_iter(&iter);
9691 /* Can not use kmalloc for iter.temp and iter.fmt */
9692 iter.temp = static_temp_buf;
9693 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9694 iter.fmt = static_fmt_buf;
9695 iter.fmt_size = STATIC_FMT_BUF_SIZE;
9697 for_each_tracing_cpu(cpu) {
9698 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9701 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9703 /* don't look at user memory in panic mode */
9704 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9706 switch (oops_dump_mode) {
9708 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9711 iter.cpu_file = raw_smp_processor_id();
9716 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9717 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9720 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9722 /* Did function tracer already get disabled? */
9723 if (ftrace_is_dead()) {
9724 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9725 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9729 * We need to stop all tracing on all CPUS to read
9730 * the next buffer. This is a bit expensive, but is
9731 * not done often. We fill all what we can read,
9732 * and then release the locks again.
9735 while (!trace_empty(&iter)) {
9738 printk(KERN_TRACE "---------------------------------\n");
9742 trace_iterator_reset(&iter);
9743 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9745 if (trace_find_next_entry_inc(&iter) != NULL) {
9748 ret = print_trace_line(&iter);
9749 if (ret != TRACE_TYPE_NO_CONSUME)
9750 trace_consume(&iter);
9752 touch_nmi_watchdog();
9754 trace_printk_seq(&iter.seq);
9758 printk(KERN_TRACE " (ftrace buffer empty)\n");
9760 printk(KERN_TRACE "---------------------------------\n");
9763 tr->trace_flags |= old_userobj;
9765 for_each_tracing_cpu(cpu) {
9766 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9768 atomic_dec(&dump_running);
9769 printk_nmi_direct_exit();
9770 local_irq_restore(flags);
9772 EXPORT_SYMBOL_GPL(ftrace_dump);
9774 #define WRITE_BUFSIZE 4096
9776 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9777 size_t count, loff_t *ppos,
9778 int (*createfn)(const char *))
9780 char *kbuf, *buf, *tmp;
9785 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9789 while (done < count) {
9790 size = count - done;
9792 if (size >= WRITE_BUFSIZE)
9793 size = WRITE_BUFSIZE - 1;
9795 if (copy_from_user(kbuf, buffer + done, size)) {
9802 tmp = strchr(buf, '\n');
9805 size = tmp - buf + 1;
9808 if (done + size < count) {
9811 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9812 pr_warn("Line length is too long: Should be less than %d\n",
9820 /* Remove comments */
9821 tmp = strchr(buf, '#');
9826 ret = createfn(buf);
9831 } while (done < count);
9841 __init static int tracer_alloc_buffers(void)
9847 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9848 pr_warn("Tracing disabled due to lockdown\n");
9853 * Make sure we don't accidentally add more trace options
9854 * than we have bits for.
9856 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9858 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9861 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9862 goto out_free_buffer_mask;
9864 /* Only allocate trace_printk buffers if a trace_printk exists */
9865 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9866 /* Must be called before global_trace.buffer is allocated */
9867 trace_printk_init_buffers();
9869 /* To save memory, keep the ring buffer size to its minimum */
9870 if (ring_buffer_expanded)
9871 ring_buf_size = trace_buf_size;
9875 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9876 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9878 raw_spin_lock_init(&global_trace.start_lock);
9881 * The prepare callbacks allocates some memory for the ring buffer. We
9882 * don't free the buffer if the CPU goes down. If we were to free
9883 * the buffer, then the user would lose any trace that was in the
9884 * buffer. The memory will be removed once the "instance" is removed.
9886 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9887 "trace/RB:preapre", trace_rb_cpu_prepare,
9890 goto out_free_cpumask;
9891 /* Used for event triggers */
9893 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9895 goto out_rm_hp_state;
9897 if (trace_create_savedcmd() < 0)
9898 goto out_free_temp_buffer;
9900 /* TODO: make the number of buffers hot pluggable with CPUS */
9901 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9902 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9903 goto out_free_savedcmd;
9906 if (global_trace.buffer_disabled)
9909 if (trace_boot_clock) {
9910 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9912 pr_warn("Trace clock %s not defined, going back to default\n",
9917 * register_tracer() might reference current_trace, so it
9918 * needs to be set before we register anything. This is
9919 * just a bootstrap of current_trace anyway.
9921 global_trace.current_trace = &nop_trace;
9923 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9925 ftrace_init_global_array_ops(&global_trace);
9927 init_trace_flags_index(&global_trace);
9929 register_tracer(&nop_trace);
9931 /* Function tracing may start here (via kernel command line) */
9932 init_function_trace();
9934 /* All seems OK, enable tracing */
9935 tracing_disabled = 0;
9937 atomic_notifier_chain_register(&panic_notifier_list,
9938 &trace_panic_notifier);
9940 register_die_notifier(&trace_die_notifier);
9942 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9944 INIT_LIST_HEAD(&global_trace.systems);
9945 INIT_LIST_HEAD(&global_trace.events);
9946 INIT_LIST_HEAD(&global_trace.hist_vars);
9947 INIT_LIST_HEAD(&global_trace.err_log);
9948 list_add(&global_trace.list, &ftrace_trace_arrays);
9950 apply_trace_boot_options();
9952 register_snapshot_cmd();
9959 free_saved_cmdlines_buffer(savedcmd);
9960 out_free_temp_buffer:
9961 ring_buffer_free(temp_buffer);
9963 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9965 free_cpumask_var(global_trace.tracing_cpumask);
9966 out_free_buffer_mask:
9967 free_cpumask_var(tracing_buffer_mask);
9972 void __init early_trace_init(void)
9974 if (tracepoint_printk) {
9975 tracepoint_print_iter =
9976 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9977 if (MEM_FAIL(!tracepoint_print_iter,
9978 "Failed to allocate trace iterator\n"))
9979 tracepoint_printk = 0;
9981 static_key_enable(&tracepoint_printk_key.key);
9983 tracer_alloc_buffers();
9986 void __init trace_init(void)
9991 __init static int clear_boot_tracer(void)
9994 * The default tracer at boot buffer is an init section.
9995 * This function is called in lateinit. If we did not
9996 * find the boot tracer, then clear it out, to prevent
9997 * later registration from accessing the buffer that is
9998 * about to be freed.
10000 if (!default_bootup_tracer)
10003 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10004 default_bootup_tracer);
10005 default_bootup_tracer = NULL;
10010 fs_initcall(tracer_init_tracefs);
10011 late_initcall_sync(clear_boot_tracer);
10013 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10014 __init static int tracing_set_default_clock(void)
10016 /* sched_clock_stable() is determined in late_initcall */
10017 if (!trace_boot_clock && !sched_clock_stable()) {
10018 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10019 pr_warn("Can not set tracing clock due to lockdown\n");
10023 printk(KERN_WARNING
10024 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10025 "If you want to keep using the local clock, then add:\n"
10026 " \"trace_clock=local\"\n"
10027 "on the kernel command line\n");
10028 tracing_set_clock(&global_trace, "global");
10033 late_initcall_sync(tracing_set_default_clock);