4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/event.h"
25 #include "util/evlist.h"
26 #include <subcmd/exec-cmd.h>
27 #include "util/machine.h"
28 #include "util/path.h"
29 #include "util/session.h"
30 #include "util/thread.h"
31 #include <subcmd/parse-options.h>
32 #include "util/strlist.h"
33 #include "util/intlist.h"
34 #include "util/thread_map.h"
35 #include "util/stat.h"
36 #include "trace/beauty/beauty.h"
37 #include "trace-event.h"
38 #include "util/parse-events.h"
39 #include "util/bpf-loader.h"
40 #include "callchain.h"
41 #include "print_binary.h"
43 #include "syscalltbl.h"
44 #include "rb_resort.h"
48 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
53 #include <linux/err.h>
54 #include <linux/filter.h>
55 #include <linux/audit.h>
56 #include <linux/kernel.h>
57 #include <linux/random.h>
58 #include <linux/stringify.h>
59 #include <linux/time64.h>
61 #include "sane_ctype.h"
64 # define O_CLOEXEC 02000000
67 #ifndef F_LINUX_SPECIFIC_BASE
68 # define F_LINUX_SPECIFIC_BASE 1024
72 struct perf_tool tool;
73 struct syscalltbl *sctbl;
76 struct syscall *table;
78 struct perf_evsel *sys_enter,
82 struct record_opts opts;
83 struct perf_evlist *evlist;
85 struct thread *current;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
98 double duration_filter;
104 unsigned int max_stack;
105 unsigned int min_stack;
106 bool not_ev_qualifier;
110 bool multiple_threads;
114 bool show_tool_stats;
116 bool kernel_syscallchains;
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
131 #define TP_UINT_FIELD(bits) \
132 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
144 #define TP_UINT_FIELD__SWAPPED(bits) \
145 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
149 return bswap_##bits(value);\
152 TP_UINT_FIELD__SWAPPED(16);
153 TP_UINT_FIELD__SWAPPED(32);
154 TP_UINT_FIELD__SWAPPED(64);
156 static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
160 field->offset = format_field->offset;
162 switch (format_field->size) {
164 field->integer = tp_field__u8;
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
182 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
184 return sample->raw_data + field->offset;
187 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
197 struct tp_field args, ret;
201 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
205 struct format_field *format_field = perf_evsel__field(evsel, name);
207 if (format_field == NULL)
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
213 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
217 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
221 struct format_field *format_field = perf_evsel__field(evsel, name);
223 if (format_field == NULL)
226 return tp_field__init_ptr(field, format_field);
229 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
233 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
236 perf_evsel__delete(evsel);
239 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
246 evsel->handler = handler;
257 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
263 evsel = perf_evsel__newtp("syscalls", direction);
268 if (perf_evsel__init_syscall_tp(evsel, handler))
274 perf_evsel__delete_priv(evsel);
278 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
282 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
286 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
288 int idx = val - sa->offset;
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
293 return scnprintf(bf, size, "%s", sa->entries[idx]);
296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 struct syscall_arg *arg)
300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
303 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
309 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
313 struct strarray **entries;
316 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
321 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
324 struct strarrays *sas = arg->parm;
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
338 return scnprintf(bf, size, "%d", arg->val);
342 #define AT_FDCWD -100
345 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
346 struct syscall_arg *arg)
351 return scnprintf(bf, size, "CWD");
353 return syscall_arg__scnprintf_fd(bf, size, arg);
356 #define SCA_FDAT syscall_arg__scnprintf_fd_at
358 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
359 struct syscall_arg *arg);
361 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
363 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
365 return scnprintf(bf, size, "%#lx", arg->val);
368 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
370 return scnprintf(bf, size, "%d", arg->val);
373 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
375 return scnprintf(bf, size, "%ld", arg->val);
378 static const char *bpf_cmd[] = {
379 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
380 "MAP_GET_NEXT_KEY", "PROG_LOAD",
382 static DEFINE_STRARRAY(bpf_cmd);
384 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
385 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
387 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
388 static DEFINE_STRARRAY(itimers);
390 static const char *keyctl_options[] = {
391 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
392 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
393 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
394 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
395 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
397 static DEFINE_STRARRAY(keyctl_options);
399 static const char *whences[] = { "SET", "CUR", "END",
407 static DEFINE_STRARRAY(whences);
409 static const char *fcntl_cmds[] = {
410 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
411 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
412 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
415 static DEFINE_STRARRAY(fcntl_cmds);
417 static const char *fcntl_linux_specific_cmds[] = {
418 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
419 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
420 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
423 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
425 static struct strarray *fcntl_cmds_arrays[] = {
426 &strarray__fcntl_cmds,
427 &strarray__fcntl_linux_specific_cmds,
430 static DEFINE_STRARRAYS(fcntl_cmds_arrays);
432 static const char *rlimit_resources[] = {
433 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
434 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
437 static DEFINE_STRARRAY(rlimit_resources);
439 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
440 static DEFINE_STRARRAY(sighow);
442 static const char *clockid[] = {
443 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
444 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
445 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
447 static DEFINE_STRARRAY(clockid);
449 static const char *socket_families[] = {
450 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
451 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
452 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
453 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
454 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
455 "ALG", "NFC", "VSOCK",
457 static DEFINE_STRARRAY(socket_families);
459 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
460 struct syscall_arg *arg)
465 if (mode == F_OK) /* 0 */
466 return scnprintf(bf, size, "F");
468 if (mode & n##_OK) { \
469 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
479 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
484 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
486 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
487 struct syscall_arg *arg);
489 #define SCA_FILENAME syscall_arg__scnprintf_filename
491 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
492 struct syscall_arg *arg)
494 int printed = 0, flags = arg->val;
497 if (flags & O_##n) { \
498 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
507 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
512 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
514 #ifndef GRND_NONBLOCK
515 #define GRND_NONBLOCK 0x0001
518 #define GRND_RANDOM 0x0002
521 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
522 struct syscall_arg *arg)
524 int printed = 0, flags = arg->val;
527 if (flags & GRND_##n) { \
528 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
529 flags &= ~GRND_##n; \
537 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
542 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
544 #define STRARRAY(name, array) \
545 { .scnprintf = SCA_STRARRAY, \
546 .parm = &strarray__##array, }
548 #include "trace/beauty/eventfd.c"
549 #include "trace/beauty/flock.c"
550 #include "trace/beauty/futex_op.c"
551 #include "trace/beauty/mmap.c"
552 #include "trace/beauty/mode_t.c"
553 #include "trace/beauty/msg_flags.c"
554 #include "trace/beauty/open_flags.c"
555 #include "trace/beauty/perf_event_open.c"
556 #include "trace/beauty/pid.c"
557 #include "trace/beauty/sched_policy.c"
558 #include "trace/beauty/seccomp.c"
559 #include "trace/beauty/signum.c"
560 #include "trace/beauty/socket_type.c"
561 #include "trace/beauty/waitid_options.c"
563 struct syscall_arg_fmt {
564 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
570 static struct syscall_fmt {
573 struct syscall_arg_fmt arg[6];
580 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
581 { .name = "arch_prctl", .alias = "prctl", },
583 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
584 { .name = "brk", .hexret = true,
585 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
586 { .name = "clock_gettime",
587 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
588 { .name = "clone", .errpid = true, .nr_args = 5,
589 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
590 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
591 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
592 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
593 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
595 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
596 { .name = "epoll_ctl",
597 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
598 { .name = "eventfd2",
599 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
600 { .name = "fchmodat",
601 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
602 { .name = "fchownat",
603 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
605 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
606 .parm = &strarrays__fcntl_cmds_arrays,
607 .show_zero = true, },
608 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
610 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
611 { .name = "fstat", .alias = "newfstat", },
612 { .name = "fstatat", .alias = "newfstatat", },
614 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
615 { .name = "futimesat",
616 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
617 { .name = "getitimer",
618 .arg = { [0] = STRARRAY(which, itimers), }, },
619 { .name = "getpid", .errpid = true, },
620 { .name = "getpgid", .errpid = true, },
621 { .name = "getppid", .errpid = true, },
622 { .name = "getrandom",
623 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
624 { .name = "getrlimit",
625 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
628 #if defined(__i386__) || defined(__x86_64__)
630 * FIXME: Make this available to all arches.
632 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
633 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
635 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
638 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
640 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
642 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
644 .arg = { [2] = STRARRAY(whence, whences), }, },
645 { .name = "lstat", .alias = "newlstat", },
647 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
648 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
650 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
652 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
654 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
655 { .name = "mlockall",
656 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
657 { .name = "mmap", .hexret = true,
658 /* The standard mmap maps to old_mmap on s390x */
659 #if defined(__s390x__)
662 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
663 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
664 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
665 { .name = "mprotect",
666 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
667 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
668 { .name = "mq_unlink",
669 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
670 { .name = "mremap", .hexret = true,
671 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
672 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
673 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
675 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
677 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
678 { .name = "name_to_handle_at",
679 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
680 { .name = "newfstatat",
681 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
683 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
684 { .name = "open_by_handle_at",
685 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
686 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
688 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
689 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
690 { .name = "perf_event_open",
691 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
692 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
693 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
695 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
696 { .name = "pkey_alloc",
697 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
698 { .name = "pkey_free",
699 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
700 { .name = "pkey_mprotect",
701 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
702 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
703 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
704 { .name = "poll", .timeout = true, },
705 { .name = "ppoll", .timeout = true, },
706 { .name = "pread", .alias = "pread64", },
707 { .name = "preadv", .alias = "pread", },
708 { .name = "prlimit64",
709 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
710 { .name = "pwrite", .alias = "pwrite64", },
711 { .name = "readlinkat",
712 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
713 { .name = "recvfrom",
714 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
715 { .name = "recvmmsg",
716 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
718 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
719 { .name = "renameat",
720 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
721 { .name = "rt_sigaction",
722 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
723 { .name = "rt_sigprocmask",
724 .arg = { [0] = STRARRAY(how, sighow), }, },
725 { .name = "rt_sigqueueinfo",
726 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
727 { .name = "rt_tgsigqueueinfo",
728 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
729 { .name = "sched_setscheduler",
730 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
732 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
733 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
734 { .name = "select", .timeout = true, },
735 { .name = "sendmmsg",
736 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
738 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
740 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
741 { .name = "set_tid_address", .errpid = true, },
742 { .name = "setitimer",
743 .arg = { [0] = STRARRAY(which, itimers), }, },
744 { .name = "setrlimit",
745 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
747 .arg = { [0] = STRARRAY(family, socket_families),
748 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
749 { .name = "socketpair",
750 .arg = { [0] = STRARRAY(family, socket_families),
751 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
752 { .name = "stat", .alias = "newstat", },
754 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
755 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
756 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
758 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
760 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
761 { .name = "symlinkat",
762 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
764 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
766 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
767 { .name = "uname", .alias = "newuname", },
768 { .name = "unlinkat",
769 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
770 { .name = "utimensat",
771 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
772 { .name = "wait4", .errpid = true,
773 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
774 { .name = "waitid", .errpid = true,
775 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
778 static int syscall_fmt__cmp(const void *name, const void *fmtp)
780 const struct syscall_fmt *fmt = fmtp;
781 return strcmp(name, fmt->name);
784 static struct syscall_fmt *syscall_fmt__find(const char *name)
786 const int nmemb = ARRAY_SIZE(syscall_fmts);
787 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
791 struct event_format *tp_format;
793 struct format_field *args;
796 struct syscall_fmt *fmt;
797 struct syscall_arg_fmt *arg_fmt;
801 * We need to have this 'calculated' boolean because in some cases we really
802 * don't know what is the duration of a syscall, for instance, when we start
803 * a session and some threads are waiting for a syscall to finish, say 'poll',
804 * in which case all we can do is to print "( ? ) for duration and for the
807 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
809 double duration = (double)t / NSEC_PER_MSEC;
810 size_t printed = fprintf(fp, "(");
813 printed += fprintf(fp, " ? ");
814 else if (duration >= 1.0)
815 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
816 else if (duration >= 0.01)
817 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
819 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
820 return printed + fprintf(fp, "): ");
824 * filename.ptr: The filename char pointer that will be vfs_getname'd
825 * filename.entry_str_pos: Where to insert the string translated from
826 * filename.ptr by the vfs_getname tracepoint/kprobe.
827 * ret_scnprintf: syscall args may set this to a different syscall return
828 * formatter, for instance, fcntl may return fds, file flags, etc.
830 struct thread_trace {
833 unsigned long nr_events;
834 unsigned long pfmaj, pfmin;
837 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
840 short int entry_str_pos;
842 unsigned int namelen;
850 struct intlist *syscall_stats;
853 static struct thread_trace *thread_trace__new(void)
855 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
858 ttrace->paths.max = -1;
860 ttrace->syscall_stats = intlist__new(NULL);
865 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
867 struct thread_trace *ttrace;
872 if (thread__priv(thread) == NULL)
873 thread__set_priv(thread, thread_trace__new());
875 if (thread__priv(thread) == NULL)
878 ttrace = thread__priv(thread);
883 color_fprintf(fp, PERF_COLOR_RED,
884 "WARNING: not enough memory, dropping samples!\n");
889 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
890 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
892 struct thread_trace *ttrace = thread__priv(arg->thread);
894 ttrace->ret_scnprintf = ret_scnprintf;
897 #define TRACE_PFMAJ (1 << 0)
898 #define TRACE_PFMIN (1 << 1)
900 static const size_t trace__entry_str_size = 2048;
902 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
904 struct thread_trace *ttrace = thread__priv(thread);
906 if (fd > ttrace->paths.max) {
907 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
912 if (ttrace->paths.max != -1) {
913 memset(npath + ttrace->paths.max + 1, 0,
914 (fd - ttrace->paths.max) * sizeof(char *));
916 memset(npath, 0, (fd + 1) * sizeof(char *));
919 ttrace->paths.table = npath;
920 ttrace->paths.max = fd;
923 ttrace->paths.table[fd] = strdup(pathname);
925 return ttrace->paths.table[fd] != NULL ? 0 : -1;
928 static int thread__read_fd_path(struct thread *thread, int fd)
930 char linkname[PATH_MAX], pathname[PATH_MAX];
934 if (thread->pid_ == thread->tid) {
935 scnprintf(linkname, sizeof(linkname),
936 "/proc/%d/fd/%d", thread->pid_, fd);
938 scnprintf(linkname, sizeof(linkname),
939 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
942 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
945 ret = readlink(linkname, pathname, sizeof(pathname));
947 if (ret < 0 || ret > st.st_size)
950 pathname[ret] = '\0';
951 return trace__set_fd_pathname(thread, fd, pathname);
954 static const char *thread__fd_path(struct thread *thread, int fd,
957 struct thread_trace *ttrace = thread__priv(thread);
965 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
968 ++trace->stats.proc_getname;
969 if (thread__read_fd_path(thread, fd))
973 return ttrace->paths.table[fd];
976 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
979 size_t printed = scnprintf(bf, size, "%d", fd);
980 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
983 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
988 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
989 struct syscall_arg *arg)
992 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
993 struct thread_trace *ttrace = thread__priv(arg->thread);
995 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
996 zfree(&ttrace->paths.table[fd]);
1001 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1004 struct thread_trace *ttrace = thread__priv(thread);
1006 ttrace->filename.ptr = ptr;
1007 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1010 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1011 struct syscall_arg *arg)
1013 unsigned long ptr = arg->val;
1015 if (!arg->trace->vfs_getname)
1016 return scnprintf(bf, size, "%#x", ptr);
1018 thread__set_filename_pos(arg->thread, bf, ptr);
1022 static bool trace__filter_duration(struct trace *trace, double t)
1024 return t < (trace->duration_filter * NSEC_PER_MSEC);
1027 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1029 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1031 return fprintf(fp, "%10.3f ", ts);
1035 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1036 * using ttrace->entry_time for a thread that receives a sys_exit without
1037 * first having received a sys_enter ("poll" issued before tracing session
1038 * starts, lost sys_enter exit due to ring buffer overflow).
1040 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1043 return __trace__fprintf_tstamp(trace, tstamp, fp);
1045 return fprintf(fp, " ? ");
1048 static bool done = false;
1049 static bool interrupted = false;
1051 static void sig_handler(int sig)
1054 interrupted = sig == SIGINT;
1057 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1058 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1060 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1061 printed += fprintf_duration(duration, duration_calculated, fp);
1063 if (trace->multiple_threads) {
1064 if (trace->show_comm)
1065 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1066 printed += fprintf(fp, "%d ", thread->tid);
1072 static int trace__process_event(struct trace *trace, struct machine *machine,
1073 union perf_event *event, struct perf_sample *sample)
1077 switch (event->header.type) {
1078 case PERF_RECORD_LOST:
1079 color_fprintf(trace->output, PERF_COLOR_RED,
1080 "LOST %" PRIu64 " events!\n", event->lost.lost);
1081 ret = machine__process_lost_event(machine, event, sample);
1084 ret = machine__process_event(machine, event, sample);
1091 static int trace__tool_process(struct perf_tool *tool,
1092 union perf_event *event,
1093 struct perf_sample *sample,
1094 struct machine *machine)
1096 struct trace *trace = container_of(tool, struct trace, tool);
1097 return trace__process_event(trace, machine, event, sample);
1100 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1102 struct machine *machine = vmachine;
1104 if (machine->kptr_restrict_warned)
1107 if (symbol_conf.kptr_restrict) {
1108 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1109 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1110 "Kernel samples will not be resolved.\n");
1111 machine->kptr_restrict_warned = true;
1115 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1118 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1120 int err = symbol__init(NULL);
1125 trace->host = machine__new_host();
1126 if (trace->host == NULL)
1129 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1132 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1133 evlist->threads, trace__tool_process, false,
1134 trace->opts.proc_map_timeout);
1141 static void trace__symbols__exit(struct trace *trace)
1143 machine__exit(trace->host);
1149 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1153 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1154 nr_args = sc->fmt->nr_args;
1156 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1157 if (sc->arg_fmt == NULL)
1160 for (idx = 0; idx < nr_args; ++idx) {
1162 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1165 sc->nr_args = nr_args;
1169 static int syscall__set_arg_fmts(struct syscall *sc)
1171 struct format_field *field;
1174 for (field = sc->args; field; field = field->next, ++idx) {
1175 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1178 if (strcmp(field->type, "const char *") == 0 &&
1179 (strcmp(field->name, "filename") == 0 ||
1180 strcmp(field->name, "path") == 0 ||
1181 strcmp(field->name, "pathname") == 0))
1182 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1183 else if (field->flags & FIELD_IS_POINTER)
1184 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
1185 else if (strcmp(field->type, "pid_t") == 0)
1186 sc->arg_fmt[idx].scnprintf = SCA_PID;
1187 else if (strcmp(field->type, "umode_t") == 0)
1188 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1189 else if ((strcmp(field->type, "int") == 0 ||
1190 strcmp(field->type, "unsigned int") == 0 ||
1191 strcmp(field->type, "long") == 0) &&
1192 (len = strlen(field->name)) >= 2 &&
1193 strcmp(field->name + len - 2, "fd") == 0) {
1195 * /sys/kernel/tracing/events/syscalls/sys_enter*
1196 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1201 sc->arg_fmt[idx].scnprintf = SCA_FD;
1208 static int trace__read_syscall_info(struct trace *trace, int id)
1212 const char *name = syscalltbl__name(trace->sctbl, id);
1217 if (id > trace->syscalls.max) {
1218 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1220 if (nsyscalls == NULL)
1223 if (trace->syscalls.max != -1) {
1224 memset(nsyscalls + trace->syscalls.max + 1, 0,
1225 (id - trace->syscalls.max) * sizeof(*sc));
1227 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1230 trace->syscalls.table = nsyscalls;
1231 trace->syscalls.max = id;
1234 sc = trace->syscalls.table + id;
1237 sc->fmt = syscall_fmt__find(sc->name);
1239 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1240 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1242 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1243 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1244 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1247 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1250 if (IS_ERR(sc->tp_format))
1253 sc->args = sc->tp_format->format.fields;
1255 * We need to check and discard the first variable '__syscall_nr'
1256 * or 'nr' that mean the syscall number. It is needless here.
1257 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1259 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1260 sc->args = sc->args->next;
1264 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1266 return syscall__set_arg_fmts(sc);
1269 static int trace__validate_ev_qualifier(struct trace *trace)
1272 size_t nr_allocated;
1273 struct str_node *pos;
1275 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1276 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1277 sizeof(trace->ev_qualifier_ids.entries[0]));
1279 if (trace->ev_qualifier_ids.entries == NULL) {
1280 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1286 nr_allocated = trace->ev_qualifier_ids.nr;
1289 strlist__for_each_entry(pos, trace->ev_qualifier) {
1290 const char *sc = pos->s;
1291 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1294 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1299 fputs("Error:\tInvalid syscall ", trace->output);
1302 fputs(", ", trace->output);
1305 fputs(sc, trace->output);
1308 trace->ev_qualifier_ids.entries[i++] = id;
1309 if (match_next == -1)
1313 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1316 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1320 entries = realloc(trace->ev_qualifier_ids.entries,
1321 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1322 if (entries == NULL) {
1324 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1327 trace->ev_qualifier_ids.entries = entries;
1329 trace->ev_qualifier_ids.nr++;
1330 trace->ev_qualifier_ids.entries[i++] = id;
1335 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1336 "\nHint:\tand: 'man syscalls'\n", trace->output);
1338 zfree(&trace->ev_qualifier_ids.entries);
1339 trace->ev_qualifier_ids.nr = 0;
1346 * args is to be interpreted as a series of longs but we need to handle
1347 * 8-byte unaligned accesses. args points to raw_data within the event
1348 * and raw_data is guaranteed to be 8-byte unaligned because it is
1349 * preceded by raw_size which is a u32. So we need to copy args to a temp
1350 * variable to read it. Most notably this avoids extended load instructions
1351 * on unaligned addresses
1353 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1356 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1358 memcpy(&val, p, sizeof(val));
1362 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1363 struct syscall_arg *arg)
1365 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1366 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1368 return scnprintf(bf, size, "arg%d: ", arg->idx);
1371 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1372 struct syscall_arg *arg, unsigned long val)
1374 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1376 if (sc->arg_fmt[arg->idx].parm)
1377 arg->parm = sc->arg_fmt[arg->idx].parm;
1378 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1380 return scnprintf(bf, size, "%ld", val);
1383 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1384 unsigned char *args, struct trace *trace,
1385 struct thread *thread)
1390 struct syscall_arg arg = {
1397 struct thread_trace *ttrace = thread__priv(thread);
1400 * Things like fcntl will set this in its 'cmd' formatter to pick the
1401 * right formatter for the return value (an fd? file flags?), which is
1402 * not needed for syscalls that always return a given type, say an fd.
1404 ttrace->ret_scnprintf = NULL;
1406 if (sc->args != NULL) {
1407 struct format_field *field;
1409 for (field = sc->args; field;
1410 field = field->next, ++arg.idx, bit <<= 1) {
1414 val = syscall_arg__val(&arg, arg.idx);
1417 * Suppress this argument if its value is zero and
1418 * and we don't have a string associated in an
1423 (sc->arg_fmt[arg.idx].show_zero ||
1424 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1425 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1426 sc->arg_fmt[arg.idx].parm))
1429 printed += scnprintf(bf + printed, size - printed,
1430 "%s%s: ", printed ? ", " : "", field->name);
1431 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1433 } else if (IS_ERR(sc->tp_format)) {
1435 * If we managed to read the tracepoint /format file, then we
1436 * may end up not having any args, like with gettid(), so only
1437 * print the raw args when we didn't manage to read it.
1439 while (arg.idx < sc->nr_args) {
1442 val = syscall_arg__val(&arg, arg.idx);
1444 printed += scnprintf(bf + printed, size - printed, ", ");
1445 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1446 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1456 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1457 union perf_event *event,
1458 struct perf_sample *sample);
1460 static struct syscall *trace__syscall_info(struct trace *trace,
1461 struct perf_evsel *evsel, int id)
1467 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1468 * before that, leaving at a higher verbosity level till that is
1469 * explained. Reproduced with plain ftrace with:
1471 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1472 * grep "NR -1 " /t/trace_pipe
1474 * After generating some load on the machine.
1478 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1479 id, perf_evsel__name(evsel), ++n);
1484 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1485 trace__read_syscall_info(trace, id))
1488 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1491 return &trace->syscalls.table[id];
1495 fprintf(trace->output, "Problems reading syscall %d", id);
1496 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1497 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1498 fputs(" information\n", trace->output);
1503 static void thread__update_stats(struct thread_trace *ttrace,
1504 int id, struct perf_sample *sample)
1506 struct int_node *inode;
1507 struct stats *stats;
1510 inode = intlist__findnew(ttrace->syscall_stats, id);
1514 stats = inode->priv;
1515 if (stats == NULL) {
1516 stats = malloc(sizeof(struct stats));
1520 inode->priv = stats;
1523 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1524 duration = sample->time - ttrace->entry_time;
1526 update_stats(stats, duration);
1529 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1531 struct thread_trace *ttrace;
1535 if (trace->current == NULL)
1538 ttrace = thread__priv(trace->current);
1540 if (!ttrace->entry_pending)
1543 duration = sample->time - ttrace->entry_time;
1545 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1546 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1547 ttrace->entry_pending = false;
1552 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1553 union perf_event *event __maybe_unused,
1554 struct perf_sample *sample)
1559 struct thread *thread;
1560 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1561 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1562 struct thread_trace *ttrace;
1567 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1568 ttrace = thread__trace(thread, trace->output);
1572 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1574 if (ttrace->entry_str == NULL) {
1575 ttrace->entry_str = malloc(trace__entry_str_size);
1576 if (!ttrace->entry_str)
1580 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1581 trace__printf_interrupted_entry(trace, sample);
1583 ttrace->entry_time = sample->time;
1584 msg = ttrace->entry_str;
1585 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1587 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1588 args, trace, thread);
1591 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1592 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1593 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1596 ttrace->entry_pending = true;
1597 /* See trace__vfs_getname & trace__sys_exit */
1598 ttrace->filename.pending_open = false;
1601 if (trace->current != thread) {
1602 thread__put(trace->current);
1603 trace->current = thread__get(thread);
1607 thread__put(thread);
1611 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1612 struct perf_sample *sample,
1613 struct callchain_cursor *cursor)
1615 struct addr_location al;
1617 if (machine__resolve(trace->host, &al, sample) < 0 ||
1618 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1624 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1626 /* TODO: user-configurable print_opts */
1627 const unsigned int print_opts = EVSEL__PRINT_SYM |
1629 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1631 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1634 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1635 union perf_event *event __maybe_unused,
1636 struct perf_sample *sample)
1640 bool duration_calculated = false;
1641 struct thread *thread;
1642 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1643 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1644 struct thread_trace *ttrace;
1649 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1650 ttrace = thread__trace(thread, trace->output);
1655 thread__update_stats(ttrace, id, sample);
1657 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1659 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1660 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1661 ttrace->filename.pending_open = false;
1662 ++trace->stats.vfs_getname;
1665 if (ttrace->entry_time) {
1666 duration = sample->time - ttrace->entry_time;
1667 if (trace__filter_duration(trace, duration))
1669 duration_calculated = true;
1670 } else if (trace->duration_filter)
1673 if (sample->callchain) {
1674 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1675 if (callchain_ret == 0) {
1676 if (callchain_cursor.nr < trace->min_stack)
1682 if (trace->summary_only)
1685 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1687 if (ttrace->entry_pending) {
1688 fprintf(trace->output, "%-70s", ttrace->entry_str);
1690 fprintf(trace->output, " ... [");
1691 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1692 fprintf(trace->output, "]: %s()", sc->name);
1695 if (sc->fmt == NULL) {
1699 fprintf(trace->output, ") = %ld", ret);
1700 } else if (ret < 0) {
1702 char bf[STRERR_BUFSIZE];
1703 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1704 *e = audit_errno_to_name(-ret);
1706 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1708 } else if (ret == 0 && sc->fmt->timeout)
1709 fprintf(trace->output, ") = 0 Timeout");
1710 else if (ttrace->ret_scnprintf) {
1712 struct syscall_arg arg = {
1717 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
1718 ttrace->ret_scnprintf = NULL;
1719 fprintf(trace->output, ") = %s", bf);
1720 } else if (sc->fmt->hexret)
1721 fprintf(trace->output, ") = %#lx", ret);
1722 else if (sc->fmt->errpid) {
1723 struct thread *child = machine__find_thread(trace->host, ret, ret);
1725 if (child != NULL) {
1726 fprintf(trace->output, ") = %ld", ret);
1727 if (child->comm_set)
1728 fprintf(trace->output, " (%s)", thread__comm_str(child));
1734 fputc('\n', trace->output);
1736 if (callchain_ret > 0)
1737 trace__fprintf_callchain(trace, sample);
1738 else if (callchain_ret < 0)
1739 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1741 ttrace->entry_pending = false;
1744 thread__put(thread);
1748 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1749 union perf_event *event __maybe_unused,
1750 struct perf_sample *sample)
1752 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1753 struct thread_trace *ttrace;
1754 size_t filename_len, entry_str_len, to_move;
1755 ssize_t remaining_space;
1757 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1762 ttrace = thread__priv(thread);
1766 filename_len = strlen(filename);
1767 if (filename_len == 0)
1770 if (ttrace->filename.namelen < filename_len) {
1771 char *f = realloc(ttrace->filename.name, filename_len + 1);
1776 ttrace->filename.namelen = filename_len;
1777 ttrace->filename.name = f;
1780 strcpy(ttrace->filename.name, filename);
1781 ttrace->filename.pending_open = true;
1783 if (!ttrace->filename.ptr)
1786 entry_str_len = strlen(ttrace->entry_str);
1787 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1788 if (remaining_space <= 0)
1791 if (filename_len > (size_t)remaining_space) {
1792 filename += filename_len - remaining_space;
1793 filename_len = remaining_space;
1796 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1797 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1798 memmove(pos + filename_len, pos, to_move);
1799 memcpy(pos, filename, filename_len);
1801 ttrace->filename.ptr = 0;
1802 ttrace->filename.entry_str_pos = 0;
1804 thread__put(thread);
1809 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1810 union perf_event *event __maybe_unused,
1811 struct perf_sample *sample)
1813 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1814 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1815 struct thread *thread = machine__findnew_thread(trace->host,
1818 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1823 ttrace->runtime_ms += runtime_ms;
1824 trace->runtime_ms += runtime_ms;
1826 thread__put(thread);
1830 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1832 perf_evsel__strval(evsel, sample, "comm"),
1833 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1835 perf_evsel__intval(evsel, sample, "vruntime"));
1839 static void bpf_output__printer(enum binary_printer_ops op,
1840 unsigned int val, void *extra)
1842 FILE *output = extra;
1843 unsigned char ch = (unsigned char)val;
1846 case BINARY_PRINT_CHAR_DATA:
1847 fprintf(output, "%c", isprint(ch) ? ch : '.');
1849 case BINARY_PRINT_DATA_BEGIN:
1850 case BINARY_PRINT_LINE_BEGIN:
1851 case BINARY_PRINT_ADDR:
1852 case BINARY_PRINT_NUM_DATA:
1853 case BINARY_PRINT_NUM_PAD:
1854 case BINARY_PRINT_SEP:
1855 case BINARY_PRINT_CHAR_PAD:
1856 case BINARY_PRINT_LINE_END:
1857 case BINARY_PRINT_DATA_END:
1863 static void bpf_output__fprintf(struct trace *trace,
1864 struct perf_sample *sample)
1866 print_binary(sample->raw_data, sample->raw_size, 8,
1867 bpf_output__printer, trace->output);
1870 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1871 union perf_event *event __maybe_unused,
1872 struct perf_sample *sample)
1874 int callchain_ret = 0;
1876 if (sample->callchain) {
1877 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1878 if (callchain_ret == 0) {
1879 if (callchain_cursor.nr < trace->min_stack)
1885 trace__printf_interrupted_entry(trace, sample);
1886 trace__fprintf_tstamp(trace, sample->time, trace->output);
1888 if (trace->trace_syscalls)
1889 fprintf(trace->output, "( ): ");
1891 fprintf(trace->output, "%s:", evsel->name);
1893 if (perf_evsel__is_bpf_output(evsel)) {
1894 bpf_output__fprintf(trace, sample);
1895 } else if (evsel->tp_format) {
1896 event_format__fprintf(evsel->tp_format, sample->cpu,
1897 sample->raw_data, sample->raw_size,
1901 fprintf(trace->output, ")\n");
1903 if (callchain_ret > 0)
1904 trace__fprintf_callchain(trace, sample);
1905 else if (callchain_ret < 0)
1906 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1911 static void print_location(FILE *f, struct perf_sample *sample,
1912 struct addr_location *al,
1913 bool print_dso, bool print_sym)
1916 if ((verbose > 0 || print_dso) && al->map)
1917 fprintf(f, "%s@", al->map->dso->long_name);
1919 if ((verbose > 0 || print_sym) && al->sym)
1920 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1921 al->addr - al->sym->start);
1923 fprintf(f, "0x%" PRIx64, al->addr);
1925 fprintf(f, "0x%" PRIx64, sample->addr);
1928 static int trace__pgfault(struct trace *trace,
1929 struct perf_evsel *evsel,
1930 union perf_event *event __maybe_unused,
1931 struct perf_sample *sample)
1933 struct thread *thread;
1934 struct addr_location al;
1935 char map_type = 'd';
1936 struct thread_trace *ttrace;
1938 int callchain_ret = 0;
1940 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1942 if (sample->callchain) {
1943 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1944 if (callchain_ret == 0) {
1945 if (callchain_cursor.nr < trace->min_stack)
1951 ttrace = thread__trace(thread, trace->output);
1955 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1960 if (trace->summary_only)
1963 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1966 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1968 fprintf(trace->output, "%sfault [",
1969 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1972 print_location(trace->output, sample, &al, false, true);
1974 fprintf(trace->output, "] => ");
1976 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1980 thread__find_addr_location(thread, sample->cpumode,
1981 MAP__FUNCTION, sample->addr, &al);
1989 print_location(trace->output, sample, &al, true, false);
1991 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1993 if (callchain_ret > 0)
1994 trace__fprintf_callchain(trace, sample);
1995 else if (callchain_ret < 0)
1996 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2000 thread__put(thread);
2004 static void trace__set_base_time(struct trace *trace,
2005 struct perf_evsel *evsel,
2006 struct perf_sample *sample)
2009 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2010 * and don't use sample->time unconditionally, we may end up having
2011 * some other event in the future without PERF_SAMPLE_TIME for good
2012 * reason, i.e. we may not be interested in its timestamps, just in
2013 * it taking place, picking some piece of information when it
2014 * appears in our event stream (vfs_getname comes to mind).
2016 if (trace->base_time == 0 && !trace->full_time &&
2017 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2018 trace->base_time = sample->time;
2021 static int trace__process_sample(struct perf_tool *tool,
2022 union perf_event *event,
2023 struct perf_sample *sample,
2024 struct perf_evsel *evsel,
2025 struct machine *machine __maybe_unused)
2027 struct trace *trace = container_of(tool, struct trace, tool);
2028 struct thread *thread;
2031 tracepoint_handler handler = evsel->handler;
2033 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2034 if (thread && thread__is_filtered(thread))
2037 trace__set_base_time(trace, evsel, sample);
2041 handler(trace, evsel, event, sample);
2044 thread__put(thread);
2048 static int trace__record(struct trace *trace, int argc, const char **argv)
2050 unsigned int rec_argc, i, j;
2051 const char **rec_argv;
2052 const char * const record_args[] = {
2059 const char * const sc_args[] = { "-e", };
2060 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2061 const char * const majpf_args[] = { "-e", "major-faults" };
2062 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2063 const char * const minpf_args[] = { "-e", "minor-faults" };
2064 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2066 /* +1 is for the event string below */
2067 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2068 majpf_args_nr + minpf_args_nr + argc;
2069 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2071 if (rec_argv == NULL)
2075 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2076 rec_argv[j++] = record_args[i];
2078 if (trace->trace_syscalls) {
2079 for (i = 0; i < sc_args_nr; i++)
2080 rec_argv[j++] = sc_args[i];
2082 /* event string may be different for older kernels - e.g., RHEL6 */
2083 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2084 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2085 else if (is_valid_tracepoint("syscalls:sys_enter"))
2086 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2088 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2094 if (trace->trace_pgfaults & TRACE_PFMAJ)
2095 for (i = 0; i < majpf_args_nr; i++)
2096 rec_argv[j++] = majpf_args[i];
2098 if (trace->trace_pgfaults & TRACE_PFMIN)
2099 for (i = 0; i < minpf_args_nr; i++)
2100 rec_argv[j++] = minpf_args[i];
2102 for (i = 0; i < (unsigned int)argc; i++)
2103 rec_argv[j++] = argv[i];
2105 return cmd_record(j, rec_argv);
2108 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2110 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2113 struct perf_evsel *evsel, *tmp;
2114 struct parse_events_error err = { .idx = 0, };
2115 int ret = parse_events(evlist, "probe:vfs_getname*", &err);
2120 evlist__for_each_entry_safe(evlist, evsel, tmp) {
2121 if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
2124 if (perf_evsel__field(evsel, "pathname")) {
2125 evsel->handler = trace__vfs_getname;
2130 list_del_init(&evsel->node);
2131 evsel->evlist = NULL;
2132 perf_evsel__delete(evsel);
2138 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2140 struct perf_evsel *evsel;
2141 struct perf_event_attr attr = {
2142 .type = PERF_TYPE_SOFTWARE,
2146 attr.config = config;
2147 attr.sample_period = 1;
2149 event_attr_init(&attr);
2151 evsel = perf_evsel__new(&attr);
2153 evsel->handler = trace__pgfault;
2158 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2160 const u32 type = event->header.type;
2161 struct perf_evsel *evsel;
2163 if (type != PERF_RECORD_SAMPLE) {
2164 trace__process_event(trace, trace->host, event, sample);
2168 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2169 if (evsel == NULL) {
2170 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2174 trace__set_base_time(trace, evsel, sample);
2176 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2177 sample->raw_data == NULL) {
2178 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2179 perf_evsel__name(evsel), sample->tid,
2180 sample->cpu, sample->raw_size);
2182 tracepoint_handler handler = evsel->handler;
2183 handler(trace, evsel, event, sample);
2187 static int trace__add_syscall_newtp(struct trace *trace)
2190 struct perf_evlist *evlist = trace->evlist;
2191 struct perf_evsel *sys_enter, *sys_exit;
2193 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2194 if (sys_enter == NULL)
2197 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2198 goto out_delete_sys_enter;
2200 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2201 if (sys_exit == NULL)
2202 goto out_delete_sys_enter;
2204 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2205 goto out_delete_sys_exit;
2207 perf_evlist__add(evlist, sys_enter);
2208 perf_evlist__add(evlist, sys_exit);
2210 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2212 * We're interested only in the user space callchain
2213 * leading to the syscall, allow overriding that for
2214 * debugging reasons using --kernel_syscall_callchains
2216 sys_exit->attr.exclude_callchain_kernel = 1;
2219 trace->syscalls.events.sys_enter = sys_enter;
2220 trace->syscalls.events.sys_exit = sys_exit;
2226 out_delete_sys_exit:
2227 perf_evsel__delete_priv(sys_exit);
2228 out_delete_sys_enter:
2229 perf_evsel__delete_priv(sys_enter);
2233 static int trace__set_ev_qualifier_filter(struct trace *trace)
2236 struct perf_evsel *sys_exit;
2237 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2238 trace->ev_qualifier_ids.nr,
2239 trace->ev_qualifier_ids.entries);
2244 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2246 sys_exit = trace->syscalls.events.sys_exit;
2247 err = perf_evsel__append_tp_filter(sys_exit, filter);
2258 static int trace__set_filter_loop_pids(struct trace *trace)
2260 unsigned int nr = 1;
2264 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2266 while (thread && nr < ARRAY_SIZE(pids)) {
2267 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2272 if (!strcmp(thread__comm_str(parent), "sshd")) {
2273 pids[nr++] = parent->tid;
2279 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2282 static int trace__run(struct trace *trace, int argc, const char **argv)
2284 struct perf_evlist *evlist = trace->evlist;
2285 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2287 unsigned long before;
2288 const bool forks = argc > 0;
2289 bool draining = false;
2293 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2294 goto out_error_raw_syscalls;
2296 if (trace->trace_syscalls)
2297 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2299 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2300 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2301 if (pgfault_maj == NULL)
2303 perf_evlist__add(evlist, pgfault_maj);
2306 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2307 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2308 if (pgfault_min == NULL)
2310 perf_evlist__add(evlist, pgfault_min);
2314 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2315 trace__sched_stat_runtime))
2316 goto out_error_sched_stat_runtime;
2318 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2320 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2321 goto out_delete_evlist;
2324 err = trace__symbols_init(trace, evlist);
2326 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2327 goto out_delete_evlist;
2330 perf_evlist__config(evlist, &trace->opts, NULL);
2332 if (callchain_param.enabled) {
2333 bool use_identifier = false;
2335 if (trace->syscalls.events.sys_exit) {
2336 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2337 &trace->opts, &callchain_param);
2338 use_identifier = true;
2342 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2343 use_identifier = true;
2347 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2348 use_identifier = true;
2351 if (use_identifier) {
2353 * Now we have evsels with different sample_ids, use
2354 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2355 * from a fixed position in each ring buffer record.
2357 * As of this the changeset introducing this comment, this
2358 * isn't strictly needed, as the fields that can come before
2359 * PERF_SAMPLE_ID are all used, but we'll probably disable
2360 * some of those for things like copying the payload of
2361 * pointer syscall arguments, and for vfs_getname we don't
2362 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2363 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2365 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2366 perf_evlist__reset_sample_bit(evlist, ID);
2370 signal(SIGCHLD, sig_handler);
2371 signal(SIGINT, sig_handler);
2374 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2377 fprintf(trace->output, "Couldn't run the workload!\n");
2378 goto out_delete_evlist;
2382 err = perf_evlist__open(evlist);
2384 goto out_error_open;
2386 err = bpf__apply_obj_config();
2388 char errbuf[BUFSIZ];
2390 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2391 pr_err("ERROR: Apply config to BPF failed: %s\n",
2393 goto out_error_open;
2397 * Better not use !target__has_task() here because we need to cover the
2398 * case where no threads were specified in the command line, but a
2399 * workload was, and in that case we will fill in the thread_map when
2400 * we fork the workload in perf_evlist__prepare_workload.
2402 if (trace->filter_pids.nr > 0)
2403 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2404 else if (thread_map__pid(evlist->threads, 0) == -1)
2405 err = trace__set_filter_loop_pids(trace);
2410 if (trace->ev_qualifier_ids.nr > 0) {
2411 err = trace__set_ev_qualifier_filter(trace);
2415 pr_debug("event qualifier tracepoint filter: %s\n",
2416 trace->syscalls.events.sys_exit->filter);
2419 err = perf_evlist__apply_filters(evlist, &evsel);
2421 goto out_error_apply_filters;
2423 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2425 goto out_error_mmap;
2427 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2428 perf_evlist__enable(evlist);
2431 perf_evlist__start_workload(evlist);
2433 if (trace->opts.initial_delay) {
2434 usleep(trace->opts.initial_delay * 1000);
2435 perf_evlist__enable(evlist);
2438 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2439 evlist->threads->nr > 1 ||
2440 perf_evlist__first(evlist)->attr.inherit;
2442 before = trace->nr_events;
2444 for (i = 0; i < evlist->nr_mmaps; i++) {
2445 union perf_event *event;
2447 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2448 struct perf_sample sample;
2452 err = perf_evlist__parse_sample(evlist, event, &sample);
2454 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2458 trace__handle_event(trace, event, &sample);
2460 perf_evlist__mmap_consume(evlist, i);
2465 if (done && !draining) {
2466 perf_evlist__disable(evlist);
2472 if (trace->nr_events == before) {
2473 int timeout = done ? 100 : -1;
2475 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2476 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2486 thread__zput(trace->current);
2488 perf_evlist__disable(evlist);
2492 trace__fprintf_thread_summary(trace, trace->output);
2494 if (trace->show_tool_stats) {
2495 fprintf(trace->output, "Stats:\n "
2496 " vfs_getname : %" PRIu64 "\n"
2497 " proc_getname: %" PRIu64 "\n",
2498 trace->stats.vfs_getname,
2499 trace->stats.proc_getname);
2504 trace__symbols__exit(trace);
2506 perf_evlist__delete(evlist);
2507 trace->evlist = NULL;
2508 trace->live = false;
2511 char errbuf[BUFSIZ];
2513 out_error_sched_stat_runtime:
2514 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2517 out_error_raw_syscalls:
2518 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2522 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2526 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2529 fprintf(trace->output, "%s\n", errbuf);
2530 goto out_delete_evlist;
2532 out_error_apply_filters:
2533 fprintf(trace->output,
2534 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2535 evsel->filter, perf_evsel__name(evsel), errno,
2536 str_error_r(errno, errbuf, sizeof(errbuf)));
2537 goto out_delete_evlist;
2540 fprintf(trace->output, "Not enough memory to run!\n");
2541 goto out_delete_evlist;
2544 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2545 goto out_delete_evlist;
2548 static int trace__replay(struct trace *trace)
2550 const struct perf_evsel_str_handler handlers[] = {
2551 { "probe:vfs_getname", trace__vfs_getname, },
2553 struct perf_data_file file = {
2555 .mode = PERF_DATA_MODE_READ,
2556 .force = trace->force,
2558 struct perf_session *session;
2559 struct perf_evsel *evsel;
2562 trace->tool.sample = trace__process_sample;
2563 trace->tool.mmap = perf_event__process_mmap;
2564 trace->tool.mmap2 = perf_event__process_mmap2;
2565 trace->tool.comm = perf_event__process_comm;
2566 trace->tool.exit = perf_event__process_exit;
2567 trace->tool.fork = perf_event__process_fork;
2568 trace->tool.attr = perf_event__process_attr;
2569 trace->tool.tracing_data = perf_event__process_tracing_data;
2570 trace->tool.build_id = perf_event__process_build_id;
2571 trace->tool.namespaces = perf_event__process_namespaces;
2573 trace->tool.ordered_events = true;
2574 trace->tool.ordering_requires_timestamps = true;
2576 /* add tid to output */
2577 trace->multiple_threads = true;
2579 session = perf_session__new(&file, false, &trace->tool);
2580 if (session == NULL)
2583 if (trace->opts.target.pid)
2584 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2586 if (trace->opts.target.tid)
2587 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2589 if (symbol__init(&session->header.env) < 0)
2592 trace->host = &session->machines.host;
2594 err = perf_session__set_tracepoints_handlers(session, handlers);
2598 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2599 "raw_syscalls:sys_enter");
2600 /* older kernels have syscalls tp versus raw_syscalls */
2602 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2603 "syscalls:sys_enter");
2606 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2607 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2608 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2612 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2613 "raw_syscalls:sys_exit");
2615 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2616 "syscalls:sys_exit");
2618 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2619 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2620 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2624 evlist__for_each_entry(session->evlist, evsel) {
2625 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2626 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2627 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2628 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2629 evsel->handler = trace__pgfault;
2634 err = perf_session__process_events(session);
2636 pr_err("Failed to process events, error %d", err);
2638 else if (trace->summary)
2639 trace__fprintf_thread_summary(trace, trace->output);
2642 perf_session__delete(session);
2647 static size_t trace__fprintf_threads_header(FILE *fp)
2651 printed = fprintf(fp, "\n Summary of events:\n\n");
2656 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2657 struct stats *stats;
2662 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2663 struct stats *stats = source->priv;
2665 entry->syscall = source->i;
2666 entry->stats = stats;
2667 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2670 static size_t thread__dump_stats(struct thread_trace *ttrace,
2671 struct trace *trace, FILE *fp)
2676 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2678 if (syscall_stats == NULL)
2681 printed += fprintf(fp, "\n");
2683 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2684 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2685 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2687 resort_rb__for_each_entry(nd, syscall_stats) {
2688 struct stats *stats = syscall_stats_entry->stats;
2690 double min = (double)(stats->min) / NSEC_PER_MSEC;
2691 double max = (double)(stats->max) / NSEC_PER_MSEC;
2692 double avg = avg_stats(stats);
2694 u64 n = (u64) stats->n;
2696 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2697 avg /= NSEC_PER_MSEC;
2699 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2700 printed += fprintf(fp, " %-15s", sc->name);
2701 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2702 n, syscall_stats_entry->msecs, min, avg);
2703 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2707 resort_rb__delete(syscall_stats);
2708 printed += fprintf(fp, "\n\n");
2713 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2716 struct thread_trace *ttrace = thread__priv(thread);
2722 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2724 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2725 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2726 printed += fprintf(fp, "%.1f%%", ratio);
2728 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2730 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2732 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2733 else if (fputc('\n', fp) != EOF)
2736 printed += thread__dump_stats(ttrace, trace, fp);
2741 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2743 return ttrace ? ttrace->nr_events : 0;
2746 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2747 struct thread *thread;
2750 entry->thread = rb_entry(nd, struct thread, rb_node);
2753 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2755 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2756 size_t printed = trace__fprintf_threads_header(fp);
2759 if (threads == NULL) {
2760 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2764 resort_rb__for_each_entry(nd, threads)
2765 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2767 resort_rb__delete(threads);
2772 static int trace__set_duration(const struct option *opt, const char *str,
2773 int unset __maybe_unused)
2775 struct trace *trace = opt->value;
2777 trace->duration_filter = atof(str);
2781 static int trace__set_filter_pids(const struct option *opt, const char *str,
2782 int unset __maybe_unused)
2786 struct trace *trace = opt->value;
2788 * FIXME: introduce a intarray class, plain parse csv and create a
2789 * { int nr, int entries[] } struct...
2791 struct intlist *list = intlist__new(str);
2796 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2797 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2799 if (trace->filter_pids.entries == NULL)
2802 trace->filter_pids.entries[0] = getpid();
2804 for (i = 1; i < trace->filter_pids.nr; ++i)
2805 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2807 intlist__delete(list);
2813 static int trace__open_output(struct trace *trace, const char *filename)
2817 if (!stat(filename, &st) && st.st_size) {
2818 char oldname[PATH_MAX];
2820 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2822 rename(filename, oldname);
2825 trace->output = fopen(filename, "w");
2827 return trace->output == NULL ? -errno : 0;
2830 static int parse_pagefaults(const struct option *opt, const char *str,
2831 int unset __maybe_unused)
2833 int *trace_pgfaults = opt->value;
2835 if (strcmp(str, "all") == 0)
2836 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2837 else if (strcmp(str, "maj") == 0)
2838 *trace_pgfaults |= TRACE_PFMAJ;
2839 else if (strcmp(str, "min") == 0)
2840 *trace_pgfaults |= TRACE_PFMIN;
2847 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2849 struct perf_evsel *evsel;
2851 evlist__for_each_entry(evlist, evsel)
2852 evsel->handler = handler;
2856 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2857 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2858 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2860 * It'd be better to introduce a parse_options() variant that would return a
2861 * list with the terms it didn't match to an event...
2863 static int trace__parse_events_option(const struct option *opt, const char *str,
2864 int unset __maybe_unused)
2866 struct trace *trace = (struct trace *)opt->value;
2867 const char *s = str;
2868 char *sep = NULL, *lists[2] = { NULL, NULL, };
2869 int len = strlen(str) + 1, err = -1, list, idx;
2870 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2871 char group_name[PATH_MAX];
2873 if (strace_groups_dir == NULL)
2878 trace->not_ev_qualifier = true;
2882 if ((sep = strchr(s, ',')) != NULL)
2886 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2887 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
2890 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2891 if (access(group_name, R_OK) == 0)
2896 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2898 lists[list] = malloc(len);
2899 if (lists[list] == NULL)
2901 strcpy(lists[list], s);
2911 if (lists[1] != NULL) {
2912 struct strlist_config slist_config = {
2913 .dirname = strace_groups_dir,
2916 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2917 if (trace->ev_qualifier == NULL) {
2918 fputs("Not enough memory to parse event qualifier", trace->output);
2922 if (trace__validate_ev_qualifier(trace))
2929 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2930 "event selector. use 'perf list' to list available events",
2931 parse_events_option);
2932 err = parse_events_option(&o, lists[0], 0);
2941 int cmd_trace(int argc, const char **argv)
2943 const char *trace_usage[] = {
2944 "perf trace [<options>] [<command>]",
2945 "perf trace [<options>] -- <command> [<options>]",
2946 "perf trace record [<options>] [<command>]",
2947 "perf trace record [<options>] -- <command> [<options>]",
2950 struct trace trace = {
2959 .user_freq = UINT_MAX,
2960 .user_interval = ULLONG_MAX,
2961 .no_buffering = true,
2962 .mmap_pages = UINT_MAX,
2963 .proc_map_timeout = 500,
2967 .trace_syscalls = true,
2968 .kernel_syscallchains = false,
2969 .max_stack = UINT_MAX,
2971 const char *output_name = NULL;
2972 const struct option trace_options[] = {
2973 OPT_CALLBACK('e', "event", &trace, "event",
2974 "event/syscall selector. use 'perf list' to list available events",
2975 trace__parse_events_option),
2976 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2977 "show the thread COMM next to its id"),
2978 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2979 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2980 trace__parse_events_option),
2981 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2982 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2983 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2984 "trace events on existing process id"),
2985 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2986 "trace events on existing thread id"),
2987 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2988 "pids to filter (by the kernel)", trace__set_filter_pids),
2989 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2990 "system-wide collection from all CPUs"),
2991 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2992 "list of cpus to monitor"),
2993 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2994 "child tasks do not inherit counters"),
2995 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2996 "number of mmap data pages",
2997 perf_evlist__parse_mmap_pages),
2998 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3000 OPT_CALLBACK(0, "duration", &trace, "float",
3001 "show only events with duration > N.M ms",
3002 trace__set_duration),
3003 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3004 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3005 OPT_BOOLEAN('T', "time", &trace.full_time,
3006 "Show full timestamp, not time relative to first start"),
3007 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3008 "Show only syscall summary with statistics"),
3009 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3010 "Show all syscalls and summary with statistics"),
3011 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3012 "Trace pagefaults", parse_pagefaults, "maj"),
3013 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3014 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3015 OPT_CALLBACK(0, "call-graph", &trace.opts,
3016 "record_mode[,record_size]", record_callchain_help,
3017 &record_parse_callchain_opt),
3018 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3019 "Show the kernel callchains on the syscall exit path"),
3020 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3021 "Set the minimum stack depth when parsing the callchain, "
3022 "anything below the specified depth will be ignored."),
3023 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3024 "Set the maximum stack depth when parsing the callchain, "
3025 "anything beyond the specified depth will be ignored. "
3026 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
3027 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3028 "per thread proc mmap processing timeout in ms"),
3029 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3030 "ms to wait before starting measurement after program "
3034 bool __maybe_unused max_stack_user_set = true;
3035 bool mmap_pages_user_set = true;
3036 const char * const trace_subcommands[] = { "record", NULL };
3040 signal(SIGSEGV, sighandler_dump_stack);
3041 signal(SIGFPE, sighandler_dump_stack);
3043 trace.evlist = perf_evlist__new();
3044 trace.sctbl = syscalltbl__new();
3046 if (trace.evlist == NULL || trace.sctbl == NULL) {
3047 pr_err("Not enough memory to run!\n");
3052 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3053 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3055 err = bpf__setup_stdout(trace.evlist);
3057 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3058 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3064 if (trace.trace_pgfaults) {
3065 trace.opts.sample_address = true;
3066 trace.opts.sample_time = true;
3069 if (trace.opts.mmap_pages == UINT_MAX)
3070 mmap_pages_user_set = false;
3072 if (trace.max_stack == UINT_MAX) {
3073 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
3074 max_stack_user_set = false;
3077 #ifdef HAVE_DWARF_UNWIND_SUPPORT
3078 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
3079 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3082 if (callchain_param.enabled) {
3083 if (!mmap_pages_user_set && geteuid() == 0)
3084 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3086 symbol_conf.use_callchain = true;
3089 if (trace.evlist->nr_entries > 0)
3090 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3092 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3093 return trace__record(&trace, argc-1, &argv[1]);
3095 /* summary_only implies summary option, but don't overwrite summary if set */
3096 if (trace.summary_only)
3097 trace.summary = trace.summary_only;
3099 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3100 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3101 pr_err("Please specify something to trace.\n");
3105 if (!trace.trace_syscalls && trace.ev_qualifier) {
3106 pr_err("The -e option can't be used with --no-syscalls.\n");
3110 if (output_name != NULL) {
3111 err = trace__open_output(&trace, output_name);
3113 perror("failed to create output file");
3118 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3120 err = target__validate(&trace.opts.target);
3122 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3123 fprintf(trace.output, "%s", bf);
3127 err = target__parse_uid(&trace.opts.target);
3129 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3130 fprintf(trace.output, "%s", bf);
3134 if (!argc && target__none(&trace.opts.target))
3135 trace.opts.target.system_wide = true;
3138 err = trace__replay(&trace);
3140 err = trace__run(&trace, argc, argv);
3143 if (output_name != NULL)
3144 fclose(trace.output);