1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "util/off_cpu.h"
68 #ifdef HAVE_EVENTFD_SUPPORT
69 #include <sys/eventfd.h>
73 #include <sys/types.h>
76 #include <linux/err.h>
77 #include <linux/string.h>
78 #include <linux/time64.h>
79 #include <linux/zalloc.h>
80 #include <linux/bitmap.h>
83 struct switch_output {
96 struct mmap_cpu_mask maps;
97 struct mmap_cpu_mask affinity;
100 struct record_thread {
102 struct thread_mask *mask;
107 struct fdarray pollfd;
111 struct mmap **overwrite_maps;
113 unsigned long long samples;
114 unsigned long waking;
116 u64 bytes_transferred;
117 u64 bytes_compressed;
120 static __thread struct record_thread *thread;
123 THREAD_MSG__UNDEFINED = 0,
128 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
133 THREAD_SPEC__UNDEFINED = 0,
136 THREAD_SPEC__PACKAGE,
142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
143 "undefined", "cpu", "core", "package", "numa", "user"
147 struct perf_tool tool;
148 struct record_opts opts;
150 struct perf_data data;
151 struct auxtrace_record *itr;
152 struct evlist *evlist;
153 struct perf_session *session;
154 struct evlist *sb_evlist;
157 bool switch_output_event_set;
160 bool no_buildid_cache;
161 bool no_buildid_cache_set;
164 bool timestamp_filename;
165 bool timestamp_boundary;
167 struct switch_output switch_output;
168 unsigned long long samples;
169 unsigned long output_max_size; /* = 0: unlimited */
170 struct perf_debuginfod debuginfod;
172 struct thread_mask *thread_masks;
173 struct record_thread *thread_data;
176 static volatile int done;
178 static volatile int auxtrace_record__snapshot_started;
179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
180 static DEFINE_TRIGGER(switch_output_trigger);
182 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
187 static inline pid_t gettid(void)
189 return (pid_t)syscall(__NR_gettid);
193 static int record__threads_enabled(struct record *rec)
195 return rec->opts.threads_spec;
198 static bool switch_output_signal(struct record *rec)
200 return rec->switch_output.signal &&
201 trigger_is_ready(&switch_output_trigger);
204 static bool switch_output_size(struct record *rec)
206 return rec->switch_output.size &&
207 trigger_is_ready(&switch_output_trigger) &&
208 (rec->bytes_written >= rec->switch_output.size);
211 static bool switch_output_time(struct record *rec)
213 return rec->switch_output.time &&
214 trigger_is_ready(&switch_output_trigger);
217 static u64 record__bytes_written(struct record *rec)
220 u64 bytes_written = rec->bytes_written;
221 struct record_thread *thread_data = rec->thread_data;
223 for (t = 0; t < rec->nr_threads; t++)
224 bytes_written += thread_data[t].bytes_written;
226 return bytes_written;
229 static bool record__output_max_size_exceeded(struct record *rec)
231 return rec->output_max_size &&
232 (record__bytes_written(rec) >= rec->output_max_size);
235 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
236 void *bf, size_t size)
238 struct perf_data_file *file = &rec->session->data->file;
240 if (map && map->file)
243 if (perf_data_file__write(file, bf, size) < 0) {
244 pr_err("failed to write perf data, error: %m\n");
248 if (map && map->file)
249 thread->bytes_written += size;
251 rec->bytes_written += size;
253 if (record__output_max_size_exceeded(rec) && !done) {
254 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
255 " stopping session ]\n",
256 record__bytes_written(rec) >> 10);
260 if (switch_output_size(rec))
261 trigger_hit(&switch_output_trigger);
266 static int record__aio_enabled(struct record *rec);
267 static int record__comp_enabled(struct record *rec);
268 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
269 void *dst, size_t dst_size, void *src, size_t src_size);
271 #ifdef HAVE_AIO_SUPPORT
272 static int record__aio_write(struct aiocb *cblock, int trace_fd,
273 void *buf, size_t size, off_t off)
277 cblock->aio_fildes = trace_fd;
278 cblock->aio_buf = buf;
279 cblock->aio_nbytes = size;
280 cblock->aio_offset = off;
281 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
284 rc = aio_write(cblock);
287 } else if (errno != EAGAIN) {
288 cblock->aio_fildes = -1;
289 pr_err("failed to queue perf data, error: %m\n");
297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
303 ssize_t aio_ret, written;
305 aio_errno = aio_error(cblock);
306 if (aio_errno == EINPROGRESS)
309 written = aio_ret = aio_return(cblock);
311 if (aio_errno != EINTR)
312 pr_err("failed to write perf data, error: %m\n");
316 rem_size = cblock->aio_nbytes - written;
319 cblock->aio_fildes = -1;
321 * md->refcount is incremented in record__aio_pushfn() for
322 * every aio write request started in record__aio_push() so
323 * decrement it because the request is now complete.
325 perf_mmap__put(&md->core);
329 * aio write request may require restart with the
330 * reminder if the kernel didn't write whole
333 rem_off = cblock->aio_offset + written;
334 rem_buf = (void *)(cblock->aio_buf + written);
335 record__aio_write(cblock, cblock->aio_fildes,
336 rem_buf, rem_size, rem_off);
343 static int record__aio_sync(struct mmap *md, bool sync_all)
345 struct aiocb **aiocb = md->aio.aiocb;
346 struct aiocb *cblocks = md->aio.cblocks;
347 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
352 for (i = 0; i < md->aio.nr_cblocks; ++i) {
353 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
360 * Started aio write is not complete yet
361 * so it has to be waited before the
364 aiocb[i] = &cblocks[i];
371 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
372 if (!(errno == EAGAIN || errno == EINTR))
373 pr_err("failed to sync perf data, error: %m\n");
384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
386 struct record_aio *aio = to;
389 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
390 * to release space in the kernel buffer as fast as possible, calling
391 * perf_mmap__consume() from perf_mmap__push() function.
393 * That lets the kernel to proceed with storing more profiling data into
394 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
396 * Coping can be done in two steps in case the chunk of profiling data
397 * crosses the upper bound of the kernel buffer. In this case we first move
398 * part of data from map->start till the upper bound and then the reminder
399 * from the beginning of the kernel buffer till the end of the data chunk.
402 if (record__comp_enabled(aio->rec)) {
403 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
404 mmap__mmap_len(map) - aio->size,
407 memcpy(aio->data + aio->size, buf, size);
412 * Increment map->refcount to guard map->aio.data[] buffer
413 * from premature deallocation because map object can be
414 * released earlier than aio write request started on
415 * map->aio.data[] buffer is complete.
417 * perf_mmap__put() is done at record__aio_complete()
418 * after started aio request completion or at record__aio_push()
419 * if the request failed to start.
421 perf_mmap__get(&map->core);
429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
432 int trace_fd = rec->session->data->file.fd;
433 struct record_aio aio = { .rec = rec, .size = 0 };
436 * Call record__aio_sync() to wait till map->aio.data[] buffer
437 * becomes available after previous aio write operation.
440 idx = record__aio_sync(map, false);
441 aio.data = map->aio.data[idx];
442 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
443 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
447 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
450 rec->bytes_written += aio.size;
451 if (switch_output_size(rec))
452 trigger_hit(&switch_output_trigger);
455 * Decrement map->refcount incremented in record__aio_pushfn()
456 * back if record__aio_write() operation failed to start, otherwise
457 * map->refcount is decremented in record__aio_complete() after
458 * aio write operation finishes successfully.
460 perf_mmap__put(&map->core);
466 static off_t record__aio_get_pos(int trace_fd)
468 return lseek(trace_fd, 0, SEEK_CUR);
471 static void record__aio_set_pos(int trace_fd, off_t pos)
473 lseek(trace_fd, pos, SEEK_SET);
476 static void record__aio_mmap_read_sync(struct record *rec)
479 struct evlist *evlist = rec->evlist;
480 struct mmap *maps = evlist->mmap;
482 if (!record__aio_enabled(rec))
485 for (i = 0; i < evlist->core.nr_mmaps; i++) {
486 struct mmap *map = &maps[i];
489 record__aio_sync(map, true);
493 static int nr_cblocks_default = 1;
494 static int nr_cblocks_max = 4;
496 static int record__aio_parse(const struct option *opt,
500 struct record_opts *opts = (struct record_opts *)opt->value;
503 opts->nr_cblocks = 0;
506 opts->nr_cblocks = strtol(str, NULL, 0);
507 if (!opts->nr_cblocks)
508 opts->nr_cblocks = nr_cblocks_default;
513 #else /* HAVE_AIO_SUPPORT */
514 static int nr_cblocks_max = 0;
516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
517 off_t *off __maybe_unused)
522 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
536 static int record__aio_enabled(struct record *rec)
538 return rec->opts.nr_cblocks > 0;
541 #define MMAP_FLUSH_DEFAULT 1
542 static int record__mmap_flush_parse(const struct option *opt,
547 struct record_opts *opts = (struct record_opts *)opt->value;
548 static struct parse_tag tags[] = {
549 { .tag = 'B', .mult = 1 },
550 { .tag = 'K', .mult = 1 << 10 },
551 { .tag = 'M', .mult = 1 << 20 },
552 { .tag = 'G', .mult = 1 << 30 },
560 opts->mmap_flush = parse_tag_value(str, tags);
561 if (opts->mmap_flush == (int)-1)
562 opts->mmap_flush = strtol(str, NULL, 0);
565 if (!opts->mmap_flush)
566 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
568 flush_max = evlist__mmap_size(opts->mmap_pages);
570 if (opts->mmap_flush > flush_max)
571 opts->mmap_flush = flush_max;
576 #ifdef HAVE_ZSTD_SUPPORT
577 static unsigned int comp_level_default = 1;
579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
581 struct record_opts *opts = opt->value;
584 opts->comp_level = 0;
587 opts->comp_level = strtol(str, NULL, 0);
588 if (!opts->comp_level)
589 opts->comp_level = comp_level_default;
595 static unsigned int comp_level_max = 22;
597 static int record__comp_enabled(struct record *rec)
599 return rec->opts.comp_level > 0;
602 static int process_synthesized_event(struct perf_tool *tool,
603 union perf_event *event,
604 struct perf_sample *sample __maybe_unused,
605 struct machine *machine __maybe_unused)
607 struct record *rec = container_of(tool, struct record, tool);
608 return record__write(rec, NULL, event, event->header.size);
611 static int process_locked_synthesized_event(struct perf_tool *tool,
612 union perf_event *event,
613 struct perf_sample *sample __maybe_unused,
614 struct machine *machine __maybe_unused)
616 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
619 pthread_mutex_lock(&synth_lock);
620 ret = process_synthesized_event(tool, event, sample, machine);
621 pthread_mutex_unlock(&synth_lock);
625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
627 struct record *rec = to;
629 if (record__comp_enabled(rec)) {
630 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
635 return record__write(rec, map, bf, size);
638 static volatile int signr = -1;
639 static volatile int child_finished;
640 #ifdef HAVE_EVENTFD_SUPPORT
641 static int done_fd = -1;
644 static void sig_handler(int sig)
652 #ifdef HAVE_EVENTFD_SUPPORT
656 * It is possible for this signal handler to run after done is checked
657 * in the main loop, but before the perf counter fds are polled. If this
658 * happens, the poll() will continue to wait even though done is set,
659 * and will only break out if either another signal is received, or the
660 * counters are ready for read. To ensure the poll() doesn't sleep when
661 * done is set, use an eventfd (done_fd) to wake up the poll().
663 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
664 pr_err("failed to signal wakeup fd, error: %m\n");
666 #endif // HAVE_EVENTFD_SUPPORT
669 static void sigsegv_handler(int sig)
671 perf_hooks__recover();
672 sighandler_dump_stack(sig);
675 static void record__sig_exit(void)
680 signal(signr, SIG_DFL);
684 #ifdef HAVE_AUXTRACE_SUPPORT
686 static int record__process_auxtrace(struct perf_tool *tool,
688 union perf_event *event, void *data1,
689 size_t len1, void *data2, size_t len2)
691 struct record *rec = container_of(tool, struct record, tool);
692 struct perf_data *data = &rec->data;
696 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
698 int fd = perf_data__fd(data);
701 file_offset = lseek(fd, 0, SEEK_CUR);
702 if (file_offset == -1)
704 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
710 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
711 padding = (len1 + len2) & 7;
713 padding = 8 - padding;
715 record__write(rec, map, event, event->header.size);
716 record__write(rec, map, data1, len1);
718 record__write(rec, map, data2, len2);
719 record__write(rec, map, &pad, padding);
724 static int record__auxtrace_mmap_read(struct record *rec,
729 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
730 record__process_auxtrace);
740 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
745 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
746 record__process_auxtrace,
747 rec->opts.auxtrace_snapshot_size);
757 static int record__auxtrace_read_snapshot_all(struct record *rec)
762 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
763 struct mmap *map = &rec->evlist->mmap[i];
765 if (!map->auxtrace_mmap.base)
768 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
779 pr_debug("Recording AUX area tracing snapshot\n");
780 if (record__auxtrace_read_snapshot_all(rec) < 0) {
781 trigger_error(&auxtrace_snapshot_trigger);
783 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
784 trigger_error(&auxtrace_snapshot_trigger);
786 trigger_ready(&auxtrace_snapshot_trigger);
790 static int record__auxtrace_snapshot_exit(struct record *rec)
792 if (trigger_is_error(&auxtrace_snapshot_trigger))
795 if (!auxtrace_record__snapshot_started &&
796 auxtrace_record__snapshot_start(rec->itr))
799 record__read_auxtrace_snapshot(rec, true);
800 if (trigger_is_error(&auxtrace_snapshot_trigger))
806 static int record__auxtrace_init(struct record *rec)
810 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
811 && record__threads_enabled(rec)) {
812 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
817 rec->itr = auxtrace_record__init(rec->evlist, &err);
822 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
823 rec->opts.auxtrace_snapshot_opts);
827 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
828 rec->opts.auxtrace_sample_opts);
832 auxtrace_regroup_aux_output(rec->evlist);
834 return auxtrace_parse_filters(rec->evlist);
840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
841 struct mmap *map __maybe_unused)
847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
848 bool on_exit __maybe_unused)
853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
864 static int record__auxtrace_init(struct record *rec __maybe_unused)
871 static int record__config_text_poke(struct evlist *evlist)
875 /* Nothing to do if text poke is already configured */
876 evlist__for_each_entry(evlist, evsel) {
877 if (evsel->core.attr.text_poke)
881 evsel = evlist__add_dummy_on_all_cpus(evlist);
885 evsel->core.attr.text_poke = 1;
886 evsel->core.attr.ksymbol = 1;
887 evsel->immediate = true;
888 evsel__set_sample_bit(evsel, TIME);
893 static int record__config_off_cpu(struct record *rec)
895 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
898 static bool record__kcore_readable(struct machine *machine)
900 char kcore[PATH_MAX];
903 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
905 fd = open(kcore, O_RDONLY);
914 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
916 char from_dir[PATH_MAX];
917 char kcore_dir[PATH_MAX];
920 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
922 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
926 return kcore_copy(from_dir, kcore_dir);
929 static void record__thread_data_init_pipes(struct record_thread *thread_data)
931 thread_data->pipes.msg[0] = -1;
932 thread_data->pipes.msg[1] = -1;
933 thread_data->pipes.ack[0] = -1;
934 thread_data->pipes.ack[1] = -1;
937 static int record__thread_data_open_pipes(struct record_thread *thread_data)
939 if (pipe(thread_data->pipes.msg))
942 if (pipe(thread_data->pipes.ack)) {
943 close(thread_data->pipes.msg[0]);
944 thread_data->pipes.msg[0] = -1;
945 close(thread_data->pipes.msg[1]);
946 thread_data->pipes.msg[1] = -1;
950 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
951 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
952 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
957 static void record__thread_data_close_pipes(struct record_thread *thread_data)
959 if (thread_data->pipes.msg[0] != -1) {
960 close(thread_data->pipes.msg[0]);
961 thread_data->pipes.msg[0] = -1;
963 if (thread_data->pipes.msg[1] != -1) {
964 close(thread_data->pipes.msg[1]);
965 thread_data->pipes.msg[1] = -1;
967 if (thread_data->pipes.ack[0] != -1) {
968 close(thread_data->pipes.ack[0]);
969 thread_data->pipes.ack[0] = -1;
971 if (thread_data->pipes.ack[1] != -1) {
972 close(thread_data->pipes.ack[1]);
973 thread_data->pipes.ack[1] = -1;
977 static bool evlist__per_thread(struct evlist *evlist)
979 return cpu_map__is_dummy(evlist->core.user_requested_cpus);
982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
984 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
985 struct mmap *mmap = evlist->mmap;
986 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
987 struct perf_cpu_map *cpus = evlist->core.all_cpus;
988 bool per_thread = evlist__per_thread(evlist);
991 thread_data->nr_mmaps = nr_mmaps;
993 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
994 thread_data->mask->maps.nbits);
996 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
997 if (!thread_data->maps)
1000 if (overwrite_mmap) {
1001 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1002 if (!thread_data->overwrite_maps) {
1003 zfree(&thread_data->maps);
1007 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1008 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1010 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1012 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1013 if (thread_data->maps) {
1014 thread_data->maps[tm] = &mmap[m];
1015 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1016 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1018 if (thread_data->overwrite_maps) {
1019 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1020 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1021 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1033 struct mmap *map, *overwrite_map;
1035 fdarray__init(&thread_data->pollfd, 64);
1037 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1038 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1039 overwrite_map = thread_data->overwrite_maps ?
1040 thread_data->overwrite_maps[tm] : NULL;
1042 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1043 void *ptr = evlist->core.pollfd.priv[f].ptr;
1045 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1046 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1047 &evlist->core.pollfd);
1050 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1051 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1059 static void record__free_thread_data(struct record *rec)
1062 struct record_thread *thread_data = rec->thread_data;
1064 if (thread_data == NULL)
1067 for (t = 0; t < rec->nr_threads; t++) {
1068 record__thread_data_close_pipes(&thread_data[t]);
1069 zfree(&thread_data[t].maps);
1070 zfree(&thread_data[t].overwrite_maps);
1071 fdarray__exit(&thread_data[t].pollfd);
1074 zfree(&rec->thread_data);
1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1080 struct record_thread *thread_data;
1082 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1083 if (!rec->thread_data) {
1084 pr_err("Failed to allocate thread data\n");
1087 thread_data = rec->thread_data;
1089 for (t = 0; t < rec->nr_threads; t++)
1090 record__thread_data_init_pipes(&thread_data[t]);
1092 for (t = 0; t < rec->nr_threads; t++) {
1093 thread_data[t].rec = rec;
1094 thread_data[t].mask = &rec->thread_masks[t];
1095 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1097 pr_err("Failed to initialize thread[%d] maps\n", t);
1100 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1102 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1106 thread_data[t].tid = -1;
1107 ret = record__thread_data_open_pipes(&thread_data[t]);
1109 pr_err("Failed to open thread[%d] communication pipes\n", t);
1112 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1113 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1115 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1118 thread_data[t].ctlfd_pos = ret;
1119 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1120 thread_data, thread_data[t].ctlfd_pos,
1121 thread_data[t].pipes.msg[0]);
1123 thread_data[t].tid = gettid();
1124 if (evlist->ctl_fd.pos == -1)
1126 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1127 &evlist->core.pollfd);
1129 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1132 thread_data[t].ctlfd_pos = ret;
1133 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1134 thread_data, thread_data[t].ctlfd_pos,
1135 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1142 record__free_thread_data(rec);
1147 static int record__mmap_evlist(struct record *rec,
1148 struct evlist *evlist)
1151 struct record_opts *opts = &rec->opts;
1152 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1153 opts->auxtrace_sample_mode;
1156 if (opts->affinity != PERF_AFFINITY_SYS)
1157 cpu__setup_cpunode_map();
1159 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1160 opts->auxtrace_mmap_pages,
1162 opts->nr_cblocks, opts->affinity,
1163 opts->mmap_flush, opts->comp_level) < 0) {
1164 if (errno == EPERM) {
1165 pr_err("Permission error mapping pages.\n"
1166 "Consider increasing "
1167 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1168 "or try again with a smaller value of -m/--mmap_pages.\n"
1169 "(current value: %u,%u)\n",
1170 opts->mmap_pages, opts->auxtrace_mmap_pages);
1173 pr_err("failed to mmap with %d (%s)\n", errno,
1174 str_error_r(errno, msg, sizeof(msg)));
1182 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1185 ret = record__alloc_thread_data(rec, evlist);
1189 if (record__threads_enabled(rec)) {
1190 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1192 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1195 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1197 evlist->mmap[i].file = &rec->data.dir.files[i];
1198 if (evlist->overwrite_mmap)
1199 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1206 static int record__mmap(struct record *rec)
1208 return record__mmap_evlist(rec, rec->evlist);
1211 static int record__open(struct record *rec)
1215 struct evlist *evlist = rec->evlist;
1216 struct perf_session *session = rec->session;
1217 struct record_opts *opts = &rec->opts;
1221 * For initial_delay, system wide or a hybrid system, we need to add a
1222 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1223 * of waiting or event synthesis.
1225 if (opts->initial_delay || target__has_cpu(&opts->target) ||
1226 perf_pmu__has_hybrid()) {
1227 pos = evlist__get_tracking_event(evlist);
1228 if (!evsel__is_dummy_event(pos)) {
1229 /* Set up dummy event. */
1230 if (evlist__add_dummy(evlist))
1232 pos = evlist__last(evlist);
1233 evlist__set_tracking_event(evlist, pos);
1237 * Enable the dummy event when the process is forked for
1238 * initial_delay, immediately for system wide.
1240 if (opts->initial_delay && !pos->immediate &&
1241 !target__has_cpu(&opts->target))
1242 pos->core.attr.enable_on_exec = 1;
1247 evlist__config(evlist, opts, &callchain_param);
1249 evlist__for_each_entry(evlist, pos) {
1251 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1252 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1254 ui__warning("%s\n", msg);
1257 if ((errno == EINVAL || errno == EBADF) &&
1258 pos->core.leader != &pos->core &&
1260 pos = evlist__reset_weak_group(evlist, pos, true);
1264 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1265 ui__error("%s\n", msg);
1269 pos->supported = true;
1272 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1277 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1278 "Samples in kernel modules won't be resolved at all.\n\n"
1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1280 "even with a suitable vmlinux or kallsyms file.\n\n");
1283 if (evlist__apply_filters(evlist, &pos)) {
1284 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1285 pos->filter, evsel__name(pos), errno,
1286 str_error_r(errno, msg, sizeof(msg)));
1291 rc = record__mmap(rec);
1295 session->evlist = evlist;
1296 perf_session__set_id_hdr_size(session);
1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1303 if (rec->evlist->first_sample_time == 0)
1304 rec->evlist->first_sample_time = sample_time;
1307 rec->evlist->last_sample_time = sample_time;
1310 static int process_sample_event(struct perf_tool *tool,
1311 union perf_event *event,
1312 struct perf_sample *sample,
1313 struct evsel *evsel,
1314 struct machine *machine)
1316 struct record *rec = container_of(tool, struct record, tool);
1318 set_timestamp_boundary(rec, sample->time);
1320 if (rec->buildid_all)
1324 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1327 static int process_buildids(struct record *rec)
1329 struct perf_session *session = rec->session;
1331 if (perf_data__size(&rec->data) == 0)
1335 * During this process, it'll load kernel map and replace the
1336 * dso->long_name to a real pathname it found. In this case
1337 * we prefer the vmlinux path like
1338 * /lib/modules/3.16.4/build/vmlinux
1340 * rather than build-id path (in debug directory).
1341 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1343 symbol_conf.ignore_vmlinux_buildid = true;
1346 * If --buildid-all is given, it marks all DSO regardless of hits,
1347 * so no need to process samples. But if timestamp_boundary is enabled,
1348 * it still needs to walk on all samples to get the timestamps of
1349 * first/last samples.
1351 if (rec->buildid_all && !rec->timestamp_boundary)
1352 rec->tool.sample = NULL;
1354 return perf_session__process_events(session);
1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1360 struct perf_tool *tool = data;
1362 *As for guest kernel when processing subcommand record&report,
1363 *we arrange module mmap prior to guest kernel mmap and trigger
1364 *a preload dso because default guest module symbols are loaded
1365 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1366 *method is used to avoid symbol missing when the first addr is
1367 *in module instead of in guest kernel.
1369 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1372 pr_err("Couldn't record guest kernel [%d]'s reference"
1373 " relocation symbol.\n", machine->pid);
1376 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1377 * have no _text sometimes.
1379 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1382 pr_err("Couldn't record guest kernel [%d]'s reference"
1383 " relocation symbol.\n", machine->pid);
1386 static struct perf_event_header finished_round_event = {
1387 .size = sizeof(struct perf_event_header),
1388 .type = PERF_RECORD_FINISHED_ROUND,
1391 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1393 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1394 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1395 thread->mask->affinity.nbits)) {
1396 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1397 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1398 map->affinity_mask.bits, thread->mask->affinity.nbits);
1399 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1400 (cpu_set_t *)thread->mask->affinity.bits);
1402 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1403 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1408 static size_t process_comp_header(void *record, size_t increment)
1410 struct perf_record_compressed *event = record;
1411 size_t size = sizeof(*event);
1414 event->header.size += increment;
1418 event->header.type = PERF_RECORD_COMPRESSED;
1419 event->header.size = size;
1424 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1425 void *dst, size_t dst_size, void *src, size_t src_size)
1428 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1429 struct zstd_data *zstd_data = &session->zstd_data;
1431 if (map && map->file)
1432 zstd_data = &map->zstd_data;
1434 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1435 max_record_size, process_comp_header);
1437 if (map && map->file) {
1438 thread->bytes_transferred += src_size;
1439 thread->bytes_compressed += compressed;
1441 session->bytes_transferred += src_size;
1442 session->bytes_compressed += compressed;
1448 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1449 bool overwrite, bool synch)
1451 u64 bytes_written = rec->bytes_written;
1456 int trace_fd = rec->data.file.fd;
1462 nr_mmaps = thread->nr_mmaps;
1463 maps = overwrite ? thread->overwrite_maps : thread->maps;
1468 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1471 if (record__aio_enabled(rec))
1472 off = record__aio_get_pos(trace_fd);
1474 for (i = 0; i < nr_mmaps; i++) {
1476 struct mmap *map = maps[i];
1478 if (map->core.base) {
1479 record__adjust_affinity(rec, map);
1481 flush = map->core.flush;
1482 map->core.flush = 1;
1484 if (!record__aio_enabled(rec)) {
1485 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1487 map->core.flush = flush;
1492 if (record__aio_push(rec, map, &off) < 0) {
1493 record__aio_set_pos(trace_fd, off);
1495 map->core.flush = flush;
1501 map->core.flush = flush;
1504 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1505 !rec->opts.auxtrace_sample_mode &&
1506 record__auxtrace_mmap_read(rec, map) != 0) {
1512 if (record__aio_enabled(rec))
1513 record__aio_set_pos(trace_fd, off);
1516 * Mark the round finished in case we wrote
1517 * at least one event.
1519 * No need for round events in directory mode,
1520 * because per-cpu maps and files have data
1523 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1524 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1527 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1532 static int record__mmap_read_all(struct record *rec, bool synch)
1536 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1540 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1543 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1544 void *arg __maybe_unused)
1546 struct perf_mmap *map = fda->priv[fd].ptr;
1549 perf_mmap__put(map);
1552 static void *record__thread(void *arg)
1554 enum thread_msg msg = THREAD_MSG__READY;
1555 bool terminate = false;
1556 struct fdarray *pollfd;
1560 thread->tid = gettid();
1562 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1564 pr_warning("threads[%d]: failed to notify on start: %s\n",
1565 thread->tid, strerror(errno));
1567 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1569 pollfd = &thread->pollfd;
1570 ctlfd_pos = thread->ctlfd_pos;
1573 unsigned long long hits = thread->samples;
1575 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1578 if (hits == thread->samples) {
1580 err = fdarray__poll(pollfd, -1);
1582 * Propagate error, only if there's any. Ignore positive
1583 * number of returned events and interrupt error.
1585 if (err > 0 || (err < 0 && errno == EINTR))
1589 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1590 record__thread_munmap_filtered, NULL) == 0)
1594 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1596 close(thread->pipes.msg[0]);
1597 thread->pipes.msg[0] = -1;
1598 pollfd->entries[ctlfd_pos].fd = -1;
1599 pollfd->entries[ctlfd_pos].events = 0;
1602 pollfd->entries[ctlfd_pos].revents = 0;
1604 record__mmap_read_all(thread->rec, true);
1606 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1608 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1609 thread->tid, strerror(errno));
1614 static void record__init_features(struct record *rec)
1616 struct perf_session *session = rec->session;
1619 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1620 perf_header__set_feat(&session->header, feat);
1622 if (rec->no_buildid)
1623 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1625 if (!have_tracepoints(&rec->evlist->core.entries))
1626 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1628 if (!rec->opts.branch_stack)
1629 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1631 if (!rec->opts.full_auxtrace)
1632 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1634 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1635 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1637 if (!rec->opts.use_clockid)
1638 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1640 if (!record__threads_enabled(rec))
1641 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1643 if (!record__comp_enabled(rec))
1644 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1646 perf_header__clear_feat(&session->header, HEADER_STAT);
1650 record__finish_output(struct record *rec)
1653 struct perf_data *data = &rec->data;
1654 int fd = perf_data__fd(data);
1659 rec->session->header.data_size += rec->bytes_written;
1660 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1661 if (record__threads_enabled(rec)) {
1662 for (i = 0; i < data->dir.nr; i++)
1663 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1666 if (!rec->no_buildid) {
1667 process_buildids(rec);
1669 if (rec->buildid_all)
1670 dsos__hit_all(rec->session);
1672 perf_session__write_header(rec->session, rec->evlist, fd, true);
1677 static int record__synthesize_workload(struct record *rec, bool tail)
1680 struct perf_thread_map *thread_map;
1681 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1683 if (rec->opts.tail_synthesize != tail)
1686 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1687 if (thread_map == NULL)
1690 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1691 process_synthesized_event,
1692 &rec->session->machines.host,
1694 rec->opts.sample_address);
1695 perf_thread_map__put(thread_map);
1699 static int record__synthesize(struct record *rec, bool tail);
1702 record__switch_output(struct record *rec, bool at_exit)
1704 struct perf_data *data = &rec->data;
1708 /* Same Size: "2015122520103046"*/
1709 char timestamp[] = "InvalidTimestamp";
1711 record__aio_mmap_read_sync(rec);
1713 record__synthesize(rec, true);
1714 if (target__none(&rec->opts.target))
1715 record__synthesize_workload(rec, true);
1718 record__finish_output(rec);
1719 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1721 pr_err("Failed to get current timestamp\n");
1725 fd = perf_data__switch(data, timestamp,
1726 rec->session->header.data_offset,
1727 at_exit, &new_filename);
1728 if (fd >= 0 && !at_exit) {
1729 rec->bytes_written = 0;
1730 rec->session->header.data_size = 0;
1734 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1735 data->path, timestamp);
1737 if (rec->switch_output.num_files) {
1738 int n = rec->switch_output.cur_file + 1;
1740 if (n >= rec->switch_output.num_files)
1742 rec->switch_output.cur_file = n;
1743 if (rec->switch_output.filenames[n]) {
1744 remove(rec->switch_output.filenames[n]);
1745 zfree(&rec->switch_output.filenames[n]);
1747 rec->switch_output.filenames[n] = new_filename;
1752 /* Output tracking events */
1754 record__synthesize(rec, false);
1757 * In 'perf record --switch-output' without -a,
1758 * record__synthesize() in record__switch_output() won't
1759 * generate tracking events because there's no thread_map
1760 * in evlist. Which causes newly created perf.data doesn't
1761 * contain map and comm information.
1762 * Create a fake thread_map and directly call
1763 * perf_event__synthesize_thread_map() for those events.
1765 if (target__none(&rec->opts.target))
1766 record__synthesize_workload(rec, false);
1771 static volatile int workload_exec_errno;
1774 * evlist__prepare_workload will send a SIGUSR1
1775 * if the fork fails, since we asked by setting its
1776 * want_signal to true.
1778 static void workload_exec_failed_signal(int signo __maybe_unused,
1780 void *ucontext __maybe_unused)
1782 workload_exec_errno = info->si_value.sival_int;
1787 static void snapshot_sig_handler(int sig);
1788 static void alarm_sig_handler(int sig);
1790 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1793 if (evlist->mmap && evlist->mmap[0].core.base)
1794 return evlist->mmap[0].core.base;
1795 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1796 return evlist->overwrite_mmap[0].core.base;
1801 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1803 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1809 static int record__synthesize(struct record *rec, bool tail)
1811 struct perf_session *session = rec->session;
1812 struct machine *machine = &session->machines.host;
1813 struct perf_data *data = &rec->data;
1814 struct record_opts *opts = &rec->opts;
1815 struct perf_tool *tool = &rec->tool;
1817 event_op f = process_synthesized_event;
1819 if (rec->opts.tail_synthesize != tail)
1822 if (data->is_pipe) {
1823 err = perf_event__synthesize_for_pipe(tool, session, data,
1824 process_synthesized_event);
1828 rec->bytes_written += err;
1831 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1832 process_synthesized_event, machine);
1836 /* Synthesize id_index before auxtrace_info */
1837 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1838 err = perf_event__synthesize_id_index(tool,
1839 process_synthesized_event,
1840 session->evlist, machine);
1845 if (rec->opts.full_auxtrace) {
1846 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1847 session, process_synthesized_event);
1852 if (!evlist__exclude_kernel(rec->evlist)) {
1853 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1855 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1856 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1857 "Check /proc/kallsyms permission or run as root.\n");
1859 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1861 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1862 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1863 "Check /proc/modules permission or run as root.\n");
1867 machines__process_guests(&session->machines,
1868 perf_event__synthesize_guest_os, tool);
1871 err = perf_event__synthesize_extra_attr(&rec->tool,
1873 process_synthesized_event,
1878 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1879 process_synthesized_event,
1882 pr_err("Couldn't synthesize thread map.\n");
1886 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
1887 process_synthesized_event, NULL);
1889 pr_err("Couldn't synthesize cpu map.\n");
1893 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1896 pr_warning("Couldn't synthesize bpf events.\n");
1898 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1899 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1902 pr_warning("Couldn't synthesize cgroup events.\n");
1905 if (rec->opts.nr_threads_synthesize > 1) {
1906 perf_set_multithreaded();
1907 f = process_locked_synthesized_event;
1910 if (rec->opts.synth & PERF_SYNTH_TASK) {
1911 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1913 err = __machine__synthesize_threads(machine, tool, &opts->target,
1914 rec->evlist->core.threads,
1915 f, needs_mmap, opts->sample_address,
1916 rec->opts.nr_threads_synthesize);
1919 if (rec->opts.nr_threads_synthesize > 1)
1920 perf_set_singlethreaded();
1926 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1928 struct record *rec = data;
1929 pthread_kill(rec->thread_id, SIGUSR2);
1933 static int record__setup_sb_evlist(struct record *rec)
1935 struct record_opts *opts = &rec->opts;
1937 if (rec->sb_evlist != NULL) {
1939 * We get here if --switch-output-event populated the
1940 * sb_evlist, so associate a callback that will send a SIGUSR2
1941 * to the main thread.
1943 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1944 rec->thread_id = pthread_self();
1946 #ifdef HAVE_LIBBPF_SUPPORT
1947 if (!opts->no_bpf_event) {
1948 if (rec->sb_evlist == NULL) {
1949 rec->sb_evlist = evlist__new();
1951 if (rec->sb_evlist == NULL) {
1952 pr_err("Couldn't create side band evlist.\n.");
1957 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1958 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1963 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1964 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1965 opts->no_bpf_event = true;
1971 static int record__init_clock(struct record *rec)
1973 struct perf_session *session = rec->session;
1974 struct timespec ref_clockid;
1975 struct timeval ref_tod;
1978 if (!rec->opts.use_clockid)
1981 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1982 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1984 session->header.env.clock.clockid = rec->opts.clockid;
1986 if (gettimeofday(&ref_tod, NULL) != 0) {
1987 pr_err("gettimeofday failed, cannot set reference time.\n");
1991 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1992 pr_err("clock_gettime failed, cannot set reference time.\n");
1996 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1997 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1999 session->header.env.clock.tod_ns = ref;
2001 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2002 (u64) ref_clockid.tv_nsec;
2004 session->header.env.clock.clockid_ns = ref;
2008 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2010 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2011 trigger_hit(&auxtrace_snapshot_trigger);
2012 auxtrace_record__snapshot_started = 1;
2013 if (auxtrace_record__snapshot_start(rec->itr))
2014 trigger_error(&auxtrace_snapshot_trigger);
2018 static void record__uniquify_name(struct record *rec)
2021 struct evlist *evlist = rec->evlist;
2025 if (!perf_pmu__has_hybrid())
2028 evlist__for_each_entry(evlist, pos) {
2029 if (!evsel__is_hybrid(pos))
2032 if (strchr(pos->name, '/'))
2035 ret = asprintf(&new_name, "%s/%s/",
2036 pos->pmu_name, pos->name);
2039 pos->name = new_name;
2044 static int record__terminate_thread(struct record_thread *thread_data)
2047 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2048 pid_t tid = thread_data->tid;
2050 close(thread_data->pipes.msg[1]);
2051 thread_data->pipes.msg[1] = -1;
2052 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2054 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2056 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2062 static int record__start_threads(struct record *rec)
2064 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2065 struct record_thread *thread_data = rec->thread_data;
2066 sigset_t full, mask;
2068 pthread_attr_t attrs;
2070 thread = &thread_data[0];
2072 if (!record__threads_enabled(rec))
2076 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2077 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2081 pthread_attr_init(&attrs);
2082 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2084 for (t = 1; t < nr_threads; t++) {
2085 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2087 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2088 pthread_attr_setaffinity_np(&attrs,
2089 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2090 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2092 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2093 for (tt = 1; tt < t; tt++)
2094 record__terminate_thread(&thread_data[t]);
2095 pr_err("Failed to start threads: %s\n", strerror(errno));
2100 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2102 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2103 thread_msg_tags[msg]);
2105 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2106 thread->tid, rec->thread_data[t].tid);
2109 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2110 (cpu_set_t *)thread->mask->affinity.bits);
2112 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2115 pthread_attr_destroy(&attrs);
2117 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2118 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2125 static int record__stop_threads(struct record *rec)
2128 struct record_thread *thread_data = rec->thread_data;
2130 for (t = 1; t < rec->nr_threads; t++)
2131 record__terminate_thread(&thread_data[t]);
2133 for (t = 0; t < rec->nr_threads; t++) {
2134 rec->samples += thread_data[t].samples;
2135 if (!record__threads_enabled(rec))
2137 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2138 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2139 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2140 thread_data[t].samples, thread_data[t].waking);
2141 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2142 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2143 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2145 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2151 static unsigned long record__waking(struct record *rec)
2154 unsigned long waking = 0;
2155 struct record_thread *thread_data = rec->thread_data;
2157 for (t = 0; t < rec->nr_threads; t++)
2158 waking += thread_data[t].waking;
2163 static int __cmd_record(struct record *rec, int argc, const char **argv)
2167 const bool forks = argc > 0;
2168 struct perf_tool *tool = &rec->tool;
2169 struct record_opts *opts = &rec->opts;
2170 struct perf_data *data = &rec->data;
2171 struct perf_session *session;
2172 bool disabled = false, draining = false;
2175 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2177 atexit(record__sig_exit);
2178 signal(SIGCHLD, sig_handler);
2179 signal(SIGINT, sig_handler);
2180 signal(SIGTERM, sig_handler);
2181 signal(SIGSEGV, sigsegv_handler);
2183 if (rec->opts.record_namespaces)
2184 tool->namespace_events = true;
2186 if (rec->opts.record_cgroup) {
2187 #ifdef HAVE_FILE_HANDLE
2188 tool->cgroup_events = true;
2190 pr_err("cgroup tracking is not supported\n");
2195 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2196 signal(SIGUSR2, snapshot_sig_handler);
2197 if (rec->opts.auxtrace_snapshot_mode)
2198 trigger_on(&auxtrace_snapshot_trigger);
2199 if (rec->switch_output.enabled)
2200 trigger_on(&switch_output_trigger);
2202 signal(SIGUSR2, SIG_IGN);
2205 session = perf_session__new(data, tool);
2206 if (IS_ERR(session)) {
2207 pr_err("Perf session creation failed.\n");
2208 return PTR_ERR(session);
2211 if (record__threads_enabled(rec)) {
2212 if (perf_data__is_pipe(&rec->data)) {
2213 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2216 if (rec->opts.full_auxtrace) {
2217 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2222 fd = perf_data__fd(data);
2223 rec->session = session;
2225 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2226 pr_err("Compression initialization failed.\n");
2229 #ifdef HAVE_EVENTFD_SUPPORT
2230 done_fd = eventfd(0, EFD_NONBLOCK);
2232 pr_err("Failed to create wakeup eventfd, error: %m\n");
2234 goto out_delete_session;
2236 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2238 pr_err("Failed to add wakeup eventfd to poll list\n");
2240 goto out_delete_session;
2242 #endif // HAVE_EVENTFD_SUPPORT
2244 session->header.env.comp_type = PERF_COMP_ZSTD;
2245 session->header.env.comp_level = rec->opts.comp_level;
2247 if (rec->opts.kcore &&
2248 !record__kcore_readable(&session->machines.host)) {
2249 pr_err("ERROR: kcore is not readable.\n");
2253 if (record__init_clock(rec))
2256 record__init_features(rec);
2259 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2260 workload_exec_failed_signal);
2262 pr_err("Couldn't run the workload!\n");
2264 goto out_delete_session;
2269 * If we have just single event and are sending data
2270 * through pipe, we need to force the ids allocation,
2271 * because we synthesize event name through the pipe
2272 * and need the id for that.
2274 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2275 rec->opts.sample_id = true;
2277 record__uniquify_name(rec);
2279 if (record__open(rec) != 0) {
2281 goto out_free_threads;
2283 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2285 if (rec->opts.kcore) {
2286 err = record__kcore_copy(&session->machines.host, data);
2288 pr_err("ERROR: Failed to copy kcore\n");
2289 goto out_free_threads;
2293 err = bpf__apply_obj_config();
2295 char errbuf[BUFSIZ];
2297 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2298 pr_err("ERROR: Apply config to BPF failed: %s\n",
2300 goto out_free_threads;
2304 * Normally perf_session__new would do this, but it doesn't have the
2307 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2308 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2309 rec->tool.ordered_events = false;
2312 if (!rec->evlist->core.nr_groups)
2313 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2315 if (data->is_pipe) {
2316 err = perf_header__write_pipe(fd);
2318 goto out_free_threads;
2320 err = perf_session__write_header(session, rec->evlist, fd, false);
2322 goto out_free_threads;
2326 if (!rec->no_buildid
2327 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2328 pr_err("Couldn't generate buildids. "
2329 "Use --no-buildid to profile anyway.\n");
2330 goto out_free_threads;
2333 err = record__setup_sb_evlist(rec);
2335 goto out_free_threads;
2337 err = record__synthesize(rec, false);
2339 goto out_free_threads;
2341 if (rec->realtime_prio) {
2342 struct sched_param param;
2344 param.sched_priority = rec->realtime_prio;
2345 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2346 pr_err("Could not set realtime priority.\n");
2348 goto out_free_threads;
2352 if (record__start_threads(rec))
2353 goto out_free_threads;
2356 * When perf is starting the traced process, all the events
2357 * (apart from group members) have enable_on_exec=1 set,
2358 * so don't spoil it by prematurely enabling them.
2360 if (!target__none(&opts->target) && !opts->initial_delay)
2361 evlist__enable(rec->evlist);
2367 struct machine *machine = &session->machines.host;
2368 union perf_event *event;
2371 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2372 if (event == NULL) {
2378 * Some H/W events are generated before COMM event
2379 * which is emitted during exec(), so perf script
2380 * cannot see a correct process name for those events.
2381 * Synthesize COMM event to prevent it.
2383 tgid = perf_event__synthesize_comm(tool, event,
2384 rec->evlist->workload.pid,
2385 process_synthesized_event,
2392 event = malloc(sizeof(event->namespaces) +
2393 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2394 machine->id_hdr_size);
2395 if (event == NULL) {
2401 * Synthesize NAMESPACES event for the command specified.
2403 perf_event__synthesize_namespaces(tool, event,
2404 rec->evlist->workload.pid,
2405 tgid, process_synthesized_event,
2409 evlist__start_workload(rec->evlist);
2412 if (opts->initial_delay) {
2413 pr_info(EVLIST_DISABLED_MSG);
2414 if (opts->initial_delay > 0) {
2415 usleep(opts->initial_delay * USEC_PER_MSEC);
2416 evlist__enable(rec->evlist);
2417 pr_info(EVLIST_ENABLED_MSG);
2421 trigger_ready(&auxtrace_snapshot_trigger);
2422 trigger_ready(&switch_output_trigger);
2423 perf_hooks__invoke_record_start();
2425 unsigned long long hits = thread->samples;
2428 * rec->evlist->bkw_mmap_state is possible to be
2429 * BKW_MMAP_EMPTY here: when done == true and
2430 * hits != rec->samples in previous round.
2432 * evlist__toggle_bkw_mmap ensure we never
2433 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2435 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2436 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2438 if (record__mmap_read_all(rec, false) < 0) {
2439 trigger_error(&auxtrace_snapshot_trigger);
2440 trigger_error(&switch_output_trigger);
2445 if (auxtrace_record__snapshot_started) {
2446 auxtrace_record__snapshot_started = 0;
2447 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2448 record__read_auxtrace_snapshot(rec, false);
2449 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2450 pr_err("AUX area tracing snapshot failed\n");
2456 if (trigger_is_hit(&switch_output_trigger)) {
2458 * If switch_output_trigger is hit, the data in
2459 * overwritable ring buffer should have been collected,
2460 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2462 * If SIGUSR2 raise after or during record__mmap_read_all(),
2463 * record__mmap_read_all() didn't collect data from
2464 * overwritable ring buffer. Read again.
2466 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2468 trigger_ready(&switch_output_trigger);
2471 * Reenable events in overwrite ring buffer after
2472 * record__mmap_read_all(): we should have collected
2475 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2478 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2479 record__waking(rec));
2481 fd = record__switch_output(rec, false);
2483 pr_err("Failed to switch to new file\n");
2484 trigger_error(&switch_output_trigger);
2489 /* re-arm the alarm */
2490 if (rec->switch_output.time)
2491 alarm(rec->switch_output.time);
2494 if (hits == thread->samples) {
2495 if (done || draining)
2497 err = fdarray__poll(&thread->pollfd, -1);
2499 * Propagate error, only if there's any. Ignore positive
2500 * number of returned events and interrupt error.
2502 if (err > 0 || (err < 0 && errno == EINTR))
2506 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2507 record__thread_munmap_filtered, NULL) == 0)
2510 evlist__ctlfd_update(rec->evlist,
2511 &thread->pollfd.entries[thread->ctlfd_pos]);
2514 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2516 case EVLIST_CTL_CMD_SNAPSHOT:
2517 hit_auxtrace_snapshot_trigger(rec);
2518 evlist__ctlfd_ack(rec->evlist);
2520 case EVLIST_CTL_CMD_STOP:
2523 case EVLIST_CTL_CMD_ACK:
2524 case EVLIST_CTL_CMD_UNSUPPORTED:
2525 case EVLIST_CTL_CMD_ENABLE:
2526 case EVLIST_CTL_CMD_DISABLE:
2527 case EVLIST_CTL_CMD_EVLIST:
2528 case EVLIST_CTL_CMD_PING:
2535 * When perf is starting the traced process, at the end events
2536 * die with the process and we wait for that. Thus no need to
2537 * disable events in this case.
2539 if (done && !disabled && !target__none(&opts->target)) {
2540 trigger_off(&auxtrace_snapshot_trigger);
2541 evlist__disable(rec->evlist);
2546 trigger_off(&auxtrace_snapshot_trigger);
2547 trigger_off(&switch_output_trigger);
2549 if (opts->auxtrace_snapshot_on_exit)
2550 record__auxtrace_snapshot_exit(rec);
2552 if (forks && workload_exec_errno) {
2553 char msg[STRERR_BUFSIZE], strevsels[2048];
2554 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2556 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2558 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2559 strevsels, argv[0], emsg);
2565 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2566 record__waking(rec));
2568 if (target__none(&rec->opts.target))
2569 record__synthesize_workload(rec, true);
2572 record__stop_threads(rec);
2573 record__mmap_read_all(rec, true);
2575 record__free_thread_data(rec);
2576 evlist__finalize_ctlfd(rec->evlist);
2577 record__aio_mmap_read_sync(rec);
2579 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2580 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2581 session->header.env.comp_ratio = ratio + 0.5;
2587 if (!child_finished)
2588 kill(rec->evlist->workload.pid, SIGTERM);
2594 else if (WIFEXITED(exit_status))
2595 status = WEXITSTATUS(exit_status);
2596 else if (WIFSIGNALED(exit_status))
2597 signr = WTERMSIG(exit_status);
2602 rec->bytes_written += off_cpu_write(rec->session);
2604 record__synthesize(rec, true);
2605 /* this will be recalculated during process_buildids() */
2609 if (!rec->timestamp_filename) {
2610 record__finish_output(rec);
2612 fd = record__switch_output(rec, true);
2615 goto out_delete_session;
2620 perf_hooks__invoke_record_end();
2622 if (!err && !quiet) {
2624 const char *postfix = rec->timestamp_filename ?
2625 ".<timestamp>" : "";
2627 if (rec->samples && !rec->opts.full_auxtrace)
2628 scnprintf(samples, sizeof(samples),
2629 " (%" PRIu64 " samples)", rec->samples);
2633 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2634 perf_data__size(data) / 1024.0 / 1024.0,
2635 data->path, postfix, samples);
2637 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2638 rec->session->bytes_transferred / 1024.0 / 1024.0,
2641 fprintf(stderr, " ]\n");
2645 #ifdef HAVE_EVENTFD_SUPPORT
2649 zstd_fini(&session->zstd_data);
2650 perf_session__delete(session);
2652 if (!opts->no_bpf_event)
2653 evlist__stop_sb_thread(rec->sb_evlist);
2657 static void callchain_debug(struct callchain_param *callchain)
2659 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2661 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2663 if (callchain->record_mode == CALLCHAIN_DWARF)
2664 pr_debug("callchain: stack dump size %d\n",
2665 callchain->dump_size);
2668 int record_opts__parse_callchain(struct record_opts *record,
2669 struct callchain_param *callchain,
2670 const char *arg, bool unset)
2673 callchain->enabled = !unset;
2675 /* --no-call-graph */
2677 callchain->record_mode = CALLCHAIN_NONE;
2678 pr_debug("callchain: disabled\n");
2682 ret = parse_callchain_record_opt(arg, callchain);
2684 /* Enable data address sampling for DWARF unwind. */
2685 if (callchain->record_mode == CALLCHAIN_DWARF)
2686 record->sample_address = true;
2687 callchain_debug(callchain);
2693 int record_parse_callchain_opt(const struct option *opt,
2697 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2700 int record_callchain_opt(const struct option *opt,
2701 const char *arg __maybe_unused,
2702 int unset __maybe_unused)
2704 struct callchain_param *callchain = opt->value;
2706 callchain->enabled = true;
2708 if (callchain->record_mode == CALLCHAIN_NONE)
2709 callchain->record_mode = CALLCHAIN_FP;
2711 callchain_debug(callchain);
2715 static int perf_record_config(const char *var, const char *value, void *cb)
2717 struct record *rec = cb;
2719 if (!strcmp(var, "record.build-id")) {
2720 if (!strcmp(value, "cache"))
2721 rec->no_buildid_cache = false;
2722 else if (!strcmp(value, "no-cache"))
2723 rec->no_buildid_cache = true;
2724 else if (!strcmp(value, "skip"))
2725 rec->no_buildid = true;
2726 else if (!strcmp(value, "mmap"))
2727 rec->buildid_mmap = true;
2732 if (!strcmp(var, "record.call-graph")) {
2733 var = "call-graph.record-mode";
2734 return perf_default_config(var, value, cb);
2736 #ifdef HAVE_AIO_SUPPORT
2737 if (!strcmp(var, "record.aio")) {
2738 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2739 if (!rec->opts.nr_cblocks)
2740 rec->opts.nr_cblocks = nr_cblocks_default;
2743 if (!strcmp(var, "record.debuginfod")) {
2744 rec->debuginfod.urls = strdup(value);
2745 if (!rec->debuginfod.urls)
2747 rec->debuginfod.set = true;
2754 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2756 struct record_opts *opts = (struct record_opts *)opt->value;
2761 if (!strcasecmp(str, "node"))
2762 opts->affinity = PERF_AFFINITY_NODE;
2763 else if (!strcasecmp(str, "cpu"))
2764 opts->affinity = PERF_AFFINITY_CPU;
2769 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2771 mask->nbits = nr_bits;
2772 mask->bits = bitmap_zalloc(mask->nbits);
2779 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2781 bitmap_free(mask->bits);
2785 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2789 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2791 mask->affinity.bits = NULL;
2795 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2797 record__mmap_cpu_mask_free(&mask->maps);
2798 mask->maps.bits = NULL;
2804 static void record__thread_mask_free(struct thread_mask *mask)
2806 record__mmap_cpu_mask_free(&mask->maps);
2807 record__mmap_cpu_mask_free(&mask->affinity);
2810 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2813 struct record_opts *opts = opt->value;
2815 if (unset || !str || !strlen(str)) {
2816 opts->threads_spec = THREAD_SPEC__CPU;
2818 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2819 if (s == THREAD_SPEC__USER) {
2820 opts->threads_user_spec = strdup(str);
2821 if (!opts->threads_user_spec)
2823 opts->threads_spec = THREAD_SPEC__USER;
2826 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2827 opts->threads_spec = s;
2833 if (opts->threads_spec == THREAD_SPEC__USER)
2834 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2836 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2841 static int parse_output_max_size(const struct option *opt,
2842 const char *str, int unset)
2844 unsigned long *s = (unsigned long *)opt->value;
2845 static struct parse_tag tags_size[] = {
2846 { .tag = 'B', .mult = 1 },
2847 { .tag = 'K', .mult = 1 << 10 },
2848 { .tag = 'M', .mult = 1 << 20 },
2849 { .tag = 'G', .mult = 1 << 30 },
2859 val = parse_tag_value(str, tags_size);
2860 if (val != (unsigned long) -1) {
2868 static int record__parse_mmap_pages(const struct option *opt,
2870 int unset __maybe_unused)
2872 struct record_opts *opts = opt->value;
2874 unsigned int mmap_pages;
2889 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2892 opts->mmap_pages = mmap_pages;
2900 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2904 opts->auxtrace_mmap_pages = mmap_pages;
2911 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2915 static int parse_control_option(const struct option *opt,
2917 int unset __maybe_unused)
2919 struct record_opts *opts = opt->value;
2921 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2924 static void switch_output_size_warn(struct record *rec)
2926 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2927 struct switch_output *s = &rec->switch_output;
2931 if (s->size < wakeup_size) {
2934 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2935 pr_warning("WARNING: switch-output data size lower than "
2936 "wakeup kernel buffer size (%s) "
2937 "expect bigger perf.data sizes\n", buf);
2941 static int switch_output_setup(struct record *rec)
2943 struct switch_output *s = &rec->switch_output;
2944 static struct parse_tag tags_size[] = {
2945 { .tag = 'B', .mult = 1 },
2946 { .tag = 'K', .mult = 1 << 10 },
2947 { .tag = 'M', .mult = 1 << 20 },
2948 { .tag = 'G', .mult = 1 << 30 },
2951 static struct parse_tag tags_time[] = {
2952 { .tag = 's', .mult = 1 },
2953 { .tag = 'm', .mult = 60 },
2954 { .tag = 'h', .mult = 60*60 },
2955 { .tag = 'd', .mult = 60*60*24 },
2961 * If we're using --switch-output-events, then we imply its
2962 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2963 * thread to its parent.
2965 if (rec->switch_output_event_set) {
2966 if (record__threads_enabled(rec)) {
2967 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2976 if (record__threads_enabled(rec)) {
2977 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2981 if (!strcmp(s->str, "signal")) {
2984 pr_debug("switch-output with SIGUSR2 signal\n");
2988 val = parse_tag_value(s->str, tags_size);
2989 if (val != (unsigned long) -1) {
2991 pr_debug("switch-output with %s size threshold\n", s->str);
2995 val = parse_tag_value(s->str, tags_time);
2996 if (val != (unsigned long) -1) {
2998 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3006 rec->timestamp_filename = true;
3009 if (s->size && !rec->opts.no_buffering)
3010 switch_output_size_warn(rec);
3015 static const char * const __record_usage[] = {
3016 "perf record [<options>] [<command>]",
3017 "perf record [<options>] -- <command> [<options>]",
3020 const char * const *record_usage = __record_usage;
3022 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3023 struct perf_sample *sample, struct machine *machine)
3026 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3027 * no need to add them twice.
3029 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3031 return perf_event__process_mmap(tool, event, sample, machine);
3034 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3035 struct perf_sample *sample, struct machine *machine)
3038 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3039 * no need to add them twice.
3041 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3044 return perf_event__process_mmap2(tool, event, sample, machine);
3047 static int process_timestamp_boundary(struct perf_tool *tool,
3048 union perf_event *event __maybe_unused,
3049 struct perf_sample *sample,
3050 struct machine *machine __maybe_unused)
3052 struct record *rec = container_of(tool, struct record, tool);
3054 set_timestamp_boundary(rec, sample->time);
3058 static int parse_record_synth_option(const struct option *opt,
3060 int unset __maybe_unused)
3062 struct record_opts *opts = opt->value;
3063 char *p = strdup(str);
3068 opts->synth = parse_synth_opt(p);
3071 if (opts->synth < 0) {
3072 pr_err("Invalid synth option: %s\n", str);
3079 * XXX Ideally would be local to cmd_record() and passed to a record__new
3080 * because we need to have access to it in record__exit, that is called
3081 * after cmd_record() exits, but since record_options need to be accessible to
3082 * builtin-script, leave it here.
3084 * At least we don't ouch it in all the other functions here directly.
3086 * Just say no to tons of global variables, sigh.
3088 static struct record record = {
3090 .sample_time = true,
3091 .mmap_pages = UINT_MAX,
3092 .user_freq = UINT_MAX,
3093 .user_interval = ULLONG_MAX,
3097 .default_per_cpu = true,
3099 .mmap_flush = MMAP_FLUSH_DEFAULT,
3100 .nr_threads_synthesize = 1,
3103 .synth = PERF_SYNTH_ALL,
3106 .sample = process_sample_event,
3107 .fork = perf_event__process_fork,
3108 .exit = perf_event__process_exit,
3109 .comm = perf_event__process_comm,
3110 .namespaces = perf_event__process_namespaces,
3111 .mmap = build_id__process_mmap,
3112 .mmap2 = build_id__process_mmap2,
3113 .itrace_start = process_timestamp_boundary,
3114 .aux = process_timestamp_boundary,
3115 .ordered_events = true,
3119 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3120 "\n\t\t\t\tDefault: fp";
3122 static bool dry_run;
3125 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3126 * with it and switch to use the library functions in perf_evlist that came
3127 * from builtin-record.c, i.e. use record_opts,
3128 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3131 static struct option __record_options[] = {
3132 OPT_CALLBACK('e', "event", &record.evlist, "event",
3133 "event selector. use 'perf list' to list available events",
3134 parse_events_option),
3135 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3136 "event filter", parse_filter),
3137 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3138 NULL, "don't record events from perf itself",
3140 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3141 "record events on existing process id"),
3142 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3143 "record events on existing thread id"),
3144 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3145 "collect data with this RT SCHED_FIFO priority"),
3146 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3147 "collect data without buffering"),
3148 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3149 "collect raw sample records from all opened counters"),
3150 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3151 "system-wide collection from all CPUs"),
3152 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3153 "list of cpus to monitor"),
3154 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3155 OPT_STRING('o', "output", &record.data.path, "file",
3156 "output file name"),
3157 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3158 &record.opts.no_inherit_set,
3159 "child tasks do not inherit counters"),
3160 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3161 "synthesize non-sample events at the end of output"),
3162 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3163 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3164 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3165 "Fail if the specified frequency can't be used"),
3166 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3167 "profile at this frequency",
3168 record__parse_freq),
3169 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3170 "number of mmap data pages and AUX area tracing mmap pages",
3171 record__parse_mmap_pages),
3172 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3173 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3174 record__mmap_flush_parse),
3175 OPT_BOOLEAN(0, "group", &record.opts.group,
3176 "put the counters into a counter group"),
3177 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3178 NULL, "enables call-graph recording" ,
3179 &record_callchain_opt),
3180 OPT_CALLBACK(0, "call-graph", &record.opts,
3181 "record_mode[,record_size]", record_callchain_help,
3182 &record_parse_callchain_opt),
3183 OPT_INCR('v', "verbose", &verbose,
3184 "be more verbose (show counter open errors, etc)"),
3185 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3186 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3187 "per thread counts"),
3188 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3189 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3190 "Record the sample physical addresses"),
3191 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3192 "Record the sampled data address data page size"),
3193 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3194 "Record the sampled code address (ip) page size"),
3195 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3196 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3197 &record.opts.sample_time_set,
3198 "Record the sample timestamps"),
3199 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3200 "Record the sample period"),
3201 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3203 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3204 &record.no_buildid_cache_set,
3205 "do not update the buildid cache"),
3206 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3207 &record.no_buildid_set,
3208 "do not collect buildids in perf.data"),
3209 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3210 "monitor event in cgroup name only",
3212 OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3213 "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3214 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3215 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3218 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3219 "branch any", "sample any taken branches",
3220 parse_branch_stack),
3222 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3223 "branch filter mask", "branch stack filter modes",
3224 parse_branch_stack),
3225 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3226 "sample by weight (on special events only)"),
3227 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3228 "sample transaction flags (special events only)"),
3229 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3230 "use per-thread mmaps"),
3231 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3232 "sample selected machine registers on interrupt,"
3233 " use '-I?' to list register names", parse_intr_regs),
3234 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3235 "sample selected machine registers on interrupt,"
3236 " use '--user-regs=?' to list register names", parse_user_regs),
3237 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3238 "Record running/enabled time of read (:S) events"),
3239 OPT_CALLBACK('k', "clockid", &record.opts,
3240 "clockid", "clockid to use for events, see clock_gettime()",
3242 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3243 "opts", "AUX area tracing Snapshot Mode", ""),
3244 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3245 "opts", "sample AUX area", ""),
3246 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3247 "per thread proc mmap processing timeout in ms"),
3248 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3249 "Record namespaces events"),
3250 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3251 "Record cgroup events"),
3252 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3253 &record.opts.record_switch_events_set,
3254 "Record context switch events"),
3255 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3256 "Configure all used events to run in kernel space.",
3257 PARSE_OPT_EXCLUSIVE),
3258 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3259 "Configure all used events to run in user space.",
3260 PARSE_OPT_EXCLUSIVE),
3261 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3262 "collect kernel callchains"),
3263 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3264 "collect user callchains"),
3265 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3266 "clang binary to use for compiling BPF scriptlets"),
3267 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3268 "options passed to clang when compiling BPF scriptlets"),
3269 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3270 "file", "vmlinux pathname"),
3271 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3272 "Record build-id of all DSOs regardless of hits"),
3273 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3274 "Record build-id in map events"),
3275 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3276 "append timestamp to output filename"),
3277 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3278 "Record timestamp boundary (time of first/last samples)"),
3279 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3280 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3281 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3283 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3284 "switch output event selector. use 'perf list' to list available events",
3285 parse_events_option_new_evlist),
3286 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3287 "Limit number of switch output generated files"),
3288 OPT_BOOLEAN(0, "dry-run", &dry_run,
3289 "Parse options then exit"),
3290 #ifdef HAVE_AIO_SUPPORT
3291 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3292 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3295 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3296 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3297 record__parse_affinity),
3298 #ifdef HAVE_ZSTD_SUPPORT
3299 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3300 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3301 record__parse_comp_level),
3303 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3304 "size", "Limit the maximum size of the output file", parse_output_max_size),
3305 OPT_UINTEGER(0, "num-thread-synthesize",
3306 &record.opts.nr_threads_synthesize,
3307 "number of threads to run for event synthesis"),
3309 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3310 "libpfm4 event selector. use 'perf list' to list available events",
3311 parse_libpfm_events_option),
3313 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3314 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3315 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3316 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3317 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3318 parse_control_option),
3319 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3320 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3321 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3322 &record.debuginfod.set, "debuginfod urls",
3323 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3325 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3326 "write collected trace data into several data files using parallel threads",
3327 record__parse_threads),
3328 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3332 struct option *record_options = __record_options;
3334 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3336 struct perf_cpu cpu;
3339 if (cpu_map__is_dummy(cpus))
3342 perf_cpu_map__for_each_cpu(cpu, idx, cpus)
3343 set_bit(cpu.cpu, mask->bits);
3346 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3348 struct perf_cpu_map *cpus;
3350 cpus = perf_cpu_map__new(mask_spec);
3354 bitmap_zero(mask->bits, mask->nbits);
3355 record__mmap_cpu_mask_init(mask, cpus);
3356 perf_cpu_map__put(cpus);
3361 static void record__free_thread_masks(struct record *rec, int nr_threads)
3365 if (rec->thread_masks)
3366 for (t = 0; t < nr_threads; t++)
3367 record__thread_mask_free(&rec->thread_masks[t]);
3369 zfree(&rec->thread_masks);
3372 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3376 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3377 if (!rec->thread_masks) {
3378 pr_err("Failed to allocate thread masks\n");
3382 for (t = 0; t < nr_threads; t++) {
3383 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3385 pr_err("Failed to allocate thread masks[%d]\n", t);
3393 record__free_thread_masks(rec, nr_threads);
3398 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3400 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3402 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3406 rec->nr_threads = nr_cpus;
3407 pr_debug("nr_threads: %d\n", rec->nr_threads);
3409 for (t = 0; t < rec->nr_threads; t++) {
3410 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3411 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3413 pr_debug("thread_masks[%d]: ", t);
3414 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3415 pr_debug("thread_masks[%d]: ", t);
3416 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3423 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3424 const char **maps_spec, const char **affinity_spec,
3429 struct mmap_cpu_mask cpus_mask;
3430 struct thread_mask thread_mask, full_mask, *thread_masks;
3432 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3434 pr_err("Failed to allocate CPUs mask\n");
3437 record__mmap_cpu_mask_init(&cpus_mask, cpus);
3439 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3441 pr_err("Failed to allocate full mask\n");
3442 goto out_free_cpu_mask;
3445 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3447 pr_err("Failed to allocate thread mask\n");
3448 goto out_free_full_and_cpu_masks;
3451 for (s = 0; s < nr_spec; s++) {
3452 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3454 pr_err("Failed to initialize maps thread mask\n");
3457 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3459 pr_err("Failed to initialize affinity thread mask\n");
3463 /* ignore invalid CPUs but do not allow empty masks */
3464 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3465 cpus_mask.bits, thread_mask.maps.nbits)) {
3466 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3470 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3471 cpus_mask.bits, thread_mask.affinity.nbits)) {
3472 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3477 /* do not allow intersection with other masks (full_mask) */
3478 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3479 thread_mask.maps.nbits)) {
3480 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3484 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3485 thread_mask.affinity.nbits)) {
3486 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3491 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3492 thread_mask.maps.bits, full_mask.maps.nbits);
3493 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3494 thread_mask.affinity.bits, full_mask.maps.nbits);
3496 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3497 if (!thread_masks) {
3498 pr_err("Failed to reallocate thread masks\n");
3502 rec->thread_masks = thread_masks;
3503 rec->thread_masks[t] = thread_mask;
3505 pr_debug("thread_masks[%d]: ", t);
3506 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3507 pr_debug("thread_masks[%d]: ", t);
3508 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3511 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3513 pr_err("Failed to allocate thread mask\n");
3514 goto out_free_full_and_cpu_masks;
3517 rec->nr_threads = t;
3518 pr_debug("nr_threads: %d\n", rec->nr_threads);
3519 if (!rec->nr_threads)
3523 record__thread_mask_free(&thread_mask);
3524 out_free_full_and_cpu_masks:
3525 record__thread_mask_free(&full_mask);
3527 record__mmap_cpu_mask_free(&cpus_mask);
3532 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3535 struct cpu_topology *topo;
3537 topo = cpu_topology__new();
3539 pr_err("Failed to allocate CPU topology\n");
3543 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3544 topo->core_cpus_list, topo->core_cpus_lists);
3545 cpu_topology__delete(topo);
3550 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3553 struct cpu_topology *topo;
3555 topo = cpu_topology__new();
3557 pr_err("Failed to allocate CPU topology\n");
3561 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3562 topo->package_cpus_list, topo->package_cpus_lists);
3563 cpu_topology__delete(topo);
3568 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3573 struct numa_topology *topo;
3575 topo = numa_topology__new();
3577 pr_err("Failed to allocate NUMA topology\n");
3581 spec = zalloc(topo->nr * sizeof(char *));
3583 pr_err("Failed to allocate NUMA spec\n");
3585 goto out_delete_topo;
3587 for (s = 0; s < topo->nr; s++)
3588 spec[s] = topo->nodes[s].cpus;
3590 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3595 numa_topology__delete(topo);
3600 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3604 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3605 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3607 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3608 spec = strtok_r(user_spec, ":", &spec_ptr);
3611 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3612 mask = strtok_r(spec, "/", &mask_ptr);
3615 pr_debug2(" maps mask: %s\n", mask);
3616 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3618 pr_err("Failed to reallocate maps spec\n");
3622 maps_spec = tmp_spec;
3623 maps_spec[nr_spec] = dup_mask = strdup(mask);
3624 if (!maps_spec[nr_spec]) {
3625 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3629 mask = strtok_r(NULL, "/", &mask_ptr);
3631 pr_err("Invalid thread maps or affinity specs\n");
3635 pr_debug2(" affinity mask: %s\n", mask);
3636 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3638 pr_err("Failed to reallocate affinity spec\n");
3642 affinity_spec = tmp_spec;
3643 affinity_spec[nr_spec] = strdup(mask);
3644 if (!affinity_spec[nr_spec]) {
3645 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3653 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3654 (const char **)affinity_spec, nr_spec);
3658 for (s = 0; s < nr_spec; s++) {
3662 free(affinity_spec[s]);
3664 free(affinity_spec);
3670 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3674 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3678 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3680 rec->nr_threads = 1;
3685 static int record__init_thread_masks(struct record *rec)
3688 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3690 if (!record__threads_enabled(rec))
3691 return record__init_thread_default_masks(rec, cpus);
3693 if (evlist__per_thread(rec->evlist)) {
3694 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3698 switch (rec->opts.threads_spec) {
3699 case THREAD_SPEC__CPU:
3700 ret = record__init_thread_cpu_masks(rec, cpus);
3702 case THREAD_SPEC__CORE:
3703 ret = record__init_thread_core_masks(rec, cpus);
3705 case THREAD_SPEC__PACKAGE:
3706 ret = record__init_thread_package_masks(rec, cpus);
3708 case THREAD_SPEC__NUMA:
3709 ret = record__init_thread_numa_masks(rec, cpus);
3711 case THREAD_SPEC__USER:
3712 ret = record__init_thread_user_masks(rec, cpus);
3721 int cmd_record(int argc, const char **argv)
3724 struct record *rec = &record;
3725 char errbuf[BUFSIZ];
3727 setlocale(LC_ALL, "");
3729 #ifndef HAVE_LIBBPF_SUPPORT
3730 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3731 set_nobuild('\0', "clang-path", true);
3732 set_nobuild('\0', "clang-opt", true);
3736 #ifndef HAVE_BPF_PROLOGUE
3737 # if !defined (HAVE_DWARF_SUPPORT)
3738 # define REASON "NO_DWARF=1"
3739 # elif !defined (HAVE_LIBBPF_SUPPORT)
3740 # define REASON "NO_LIBBPF=1"
3742 # define REASON "this architecture doesn't support BPF prologue"
3744 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3745 set_nobuild('\0', "vmlinux", true);
3750 #ifndef HAVE_BPF_SKEL
3751 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3752 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3756 rec->opts.affinity = PERF_AFFINITY_SYS;
3758 rec->evlist = evlist__new();
3759 if (rec->evlist == NULL)
3762 err = perf_config(perf_record_config, rec);
3766 argc = parse_options(argc, argv, record_options, record_usage,
3767 PARSE_OPT_STOP_AT_NON_OPTION);
3769 perf_quiet_option();
3771 err = symbol__validate_sym_arguments();
3775 perf_debuginfod_setup(&record.debuginfod);
3777 /* Make system wide (-a) the default target. */
3778 if (!argc && target__none(&rec->opts.target))
3779 rec->opts.target.system_wide = true;
3781 if (nr_cgroups && !rec->opts.target.system_wide) {
3782 usage_with_options_msg(record_usage, record_options,
3783 "cgroup monitoring only available in system-wide mode");
3787 if (rec->buildid_mmap) {
3788 if (!perf_can_record_build_id()) {
3789 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3793 pr_debug("Enabling build id in mmap2 events.\n");
3794 /* Enable mmap build id synthesizing. */
3795 symbol_conf.buildid_mmap2 = true;
3796 /* Enable perf_event_attr::build_id bit. */
3797 rec->opts.build_id = true;
3798 /* Disable build id cache. */
3799 rec->no_buildid = true;
3802 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3803 pr_err("Kernel has no cgroup sampling support.\n");
3808 if (rec->opts.kcore || record__threads_enabled(rec))
3809 rec->data.is_dir = true;
3811 if (record__threads_enabled(rec)) {
3812 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3813 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3816 if (record__aio_enabled(rec)) {
3817 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3822 if (rec->opts.comp_level != 0) {
3823 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3824 rec->no_buildid = true;
3827 if (rec->opts.record_switch_events &&
3828 !perf_can_record_switch_events()) {
3829 ui__error("kernel does not support recording context switch events\n");
3830 parse_options_usage(record_usage, record_options, "switch-events", 0);
3835 if (switch_output_setup(rec)) {
3836 parse_options_usage(record_usage, record_options, "switch-output", 0);
3841 if (rec->switch_output.time) {
3842 signal(SIGALRM, alarm_sig_handler);
3843 alarm(rec->switch_output.time);
3846 if (rec->switch_output.num_files) {
3847 rec->switch_output.filenames = calloc(sizeof(char *),
3848 rec->switch_output.num_files);
3849 if (!rec->switch_output.filenames) {
3855 if (rec->timestamp_filename && record__threads_enabled(rec)) {
3856 rec->timestamp_filename = false;
3857 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3861 * Allow aliases to facilitate the lookup of symbols for address
3862 * filters. Refer to auxtrace_parse_filters().
3864 symbol_conf.allow_aliases = true;
3868 err = record__auxtrace_init(rec);
3875 err = bpf__setup_stdout(rec->evlist);
3877 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3878 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3885 if (rec->no_buildid_cache || rec->no_buildid) {
3886 disable_buildid_cache();
3887 } else if (rec->switch_output.enabled) {
3889 * In 'perf record --switch-output', disable buildid
3890 * generation by default to reduce data file switching
3891 * overhead. Still generate buildid if they are required
3894 * perf record --switch-output --no-no-buildid \
3895 * --no-no-buildid-cache
3897 * Following code equals to:
3899 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3900 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3901 * disable_buildid_cache();
3903 bool disable = true;
3905 if (rec->no_buildid_set && !rec->no_buildid)
3907 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3910 rec->no_buildid = true;
3911 rec->no_buildid_cache = true;
3912 disable_buildid_cache();
3916 if (record.opts.overwrite)
3917 record.opts.tail_synthesize = true;
3919 if (rec->evlist->core.nr_entries == 0) {
3920 if (perf_pmu__has_hybrid()) {
3921 err = evlist__add_default_hybrid(rec->evlist,
3922 !record.opts.no_samples);
3924 err = __evlist__add_default(rec->evlist,
3925 !record.opts.no_samples);
3929 pr_err("Not enough memory for event selector list\n");
3934 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3935 rec->opts.no_inherit = true;
3937 err = target__validate(&rec->opts.target);
3939 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3940 ui__warning("%s\n", errbuf);
3943 err = target__parse_uid(&rec->opts.target);
3945 int saved_errno = errno;
3947 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3948 ui__error("%s", errbuf);
3954 /* Enable ignoring missing threads when -u/-p option is defined. */
3955 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3957 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3958 pr_err("failed to use cpu list %s\n",
3959 rec->opts.target.cpu_list);
3963 rec->opts.target.hybrid = perf_pmu__has_hybrid();
3965 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3966 arch__add_leaf_frame_record_opts(&rec->opts);
3969 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3970 usage_with_options(record_usage, record_options);
3972 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3977 * We take all buildids when the file contains
3978 * AUX area tracing data because we do not decode the
3979 * trace because it would take too long.
3981 if (rec->opts.full_auxtrace)
3982 rec->buildid_all = true;
3984 if (rec->opts.text_poke) {
3985 err = record__config_text_poke(rec->evlist);
3987 pr_err("record__config_text_poke failed, error %d\n", err);
3993 err = record__config_off_cpu(rec);
3995 pr_err("record__config_off_cpu failed, error %d\n", err);
4000 if (record_opts__config(&rec->opts)) {
4005 err = record__init_thread_masks(rec);
4007 pr_err("Failed to initialize parallel data streaming masks\n");
4011 if (rec->opts.nr_cblocks > nr_cblocks_max)
4012 rec->opts.nr_cblocks = nr_cblocks_max;
4013 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4015 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4016 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4018 if (rec->opts.comp_level > comp_level_max)
4019 rec->opts.comp_level = comp_level_max;
4020 pr_debug("comp level: %d\n", rec->opts.comp_level);
4022 err = __cmd_record(&record, argc, argv);
4024 evlist__delete(rec->evlist);
4026 auxtrace_record__free(rec->itr);
4028 record__free_thread_masks(rec, rec->nr_threads);
4029 rec->nr_threads = 0;
4030 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4034 static void snapshot_sig_handler(int sig __maybe_unused)
4036 struct record *rec = &record;
4038 hit_auxtrace_snapshot_trigger(rec);
4040 if (switch_output_signal(rec))
4041 trigger_hit(&switch_output_trigger);
4044 static void alarm_sig_handler(int sig __maybe_unused)
4046 struct record *rec = &record;
4048 if (switch_output_time(rec))
4049 trigger_hit(&switch_output_trigger);