GNU Linux-libre 5.19-rc6-gnu
[releases.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "util/off_cpu.h"
53 #include "asm/bug.h"
54 #include "perf.h"
55 #include "cputopo.h"
56
57 #include <errno.h>
58 #include <inttypes.h>
59 #include <locale.h>
60 #include <poll.h>
61 #include <pthread.h>
62 #include <unistd.h>
63 #ifndef HAVE_GETTID
64 #include <syscall.h>
65 #endif
66 #include <sched.h>
67 #include <signal.h>
68 #ifdef HAVE_EVENTFD_SUPPORT
69 #include <sys/eventfd.h>
70 #endif
71 #include <sys/mman.h>
72 #include <sys/wait.h>
73 #include <sys/types.h>
74 #include <sys/stat.h>
75 #include <fcntl.h>
76 #include <linux/err.h>
77 #include <linux/string.h>
78 #include <linux/time64.h>
79 #include <linux/zalloc.h>
80 #include <linux/bitmap.h>
81 #include <sys/time.h>
82
83 struct switch_output {
84         bool             enabled;
85         bool             signal;
86         unsigned long    size;
87         unsigned long    time;
88         const char      *str;
89         bool             set;
90         char             **filenames;
91         int              num_files;
92         int              cur_file;
93 };
94
95 struct thread_mask {
96         struct mmap_cpu_mask    maps;
97         struct mmap_cpu_mask    affinity;
98 };
99
100 struct record_thread {
101         pid_t                   tid;
102         struct thread_mask      *mask;
103         struct {
104                 int             msg[2];
105                 int             ack[2];
106         } pipes;
107         struct fdarray          pollfd;
108         int                     ctlfd_pos;
109         int                     nr_mmaps;
110         struct mmap             **maps;
111         struct mmap             **overwrite_maps;
112         struct record           *rec;
113         unsigned long long      samples;
114         unsigned long           waking;
115         u64                     bytes_written;
116         u64                     bytes_transferred;
117         u64                     bytes_compressed;
118 };
119
120 static __thread struct record_thread *thread;
121
122 enum thread_msg {
123         THREAD_MSG__UNDEFINED = 0,
124         THREAD_MSG__READY,
125         THREAD_MSG__MAX,
126 };
127
128 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
129         "UNDEFINED", "READY"
130 };
131
132 enum thread_spec {
133         THREAD_SPEC__UNDEFINED = 0,
134         THREAD_SPEC__CPU,
135         THREAD_SPEC__CORE,
136         THREAD_SPEC__PACKAGE,
137         THREAD_SPEC__NUMA,
138         THREAD_SPEC__USER,
139         THREAD_SPEC__MAX,
140 };
141
142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
143         "undefined", "cpu", "core", "package", "numa", "user"
144 };
145
146 struct record {
147         struct perf_tool        tool;
148         struct record_opts      opts;
149         u64                     bytes_written;
150         struct perf_data        data;
151         struct auxtrace_record  *itr;
152         struct evlist   *evlist;
153         struct perf_session     *session;
154         struct evlist           *sb_evlist;
155         pthread_t               thread_id;
156         int                     realtime_prio;
157         bool                    switch_output_event_set;
158         bool                    no_buildid;
159         bool                    no_buildid_set;
160         bool                    no_buildid_cache;
161         bool                    no_buildid_cache_set;
162         bool                    buildid_all;
163         bool                    buildid_mmap;
164         bool                    timestamp_filename;
165         bool                    timestamp_boundary;
166         bool                    off_cpu;
167         struct switch_output    switch_output;
168         unsigned long long      samples;
169         unsigned long           output_max_size;        /* = 0: unlimited */
170         struct perf_debuginfod  debuginfod;
171         int                     nr_threads;
172         struct thread_mask      *thread_masks;
173         struct record_thread    *thread_data;
174 };
175
176 static volatile int done;
177
178 static volatile int auxtrace_record__snapshot_started;
179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
180 static DEFINE_TRIGGER(switch_output_trigger);
181
182 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
183         "SYS", "NODE", "CPU"
184 };
185
186 #ifndef HAVE_GETTID
187 static inline pid_t gettid(void)
188 {
189         return (pid_t)syscall(__NR_gettid);
190 }
191 #endif
192
193 static int record__threads_enabled(struct record *rec)
194 {
195         return rec->opts.threads_spec;
196 }
197
198 static bool switch_output_signal(struct record *rec)
199 {
200         return rec->switch_output.signal &&
201                trigger_is_ready(&switch_output_trigger);
202 }
203
204 static bool switch_output_size(struct record *rec)
205 {
206         return rec->switch_output.size &&
207                trigger_is_ready(&switch_output_trigger) &&
208                (rec->bytes_written >= rec->switch_output.size);
209 }
210
211 static bool switch_output_time(struct record *rec)
212 {
213         return rec->switch_output.time &&
214                trigger_is_ready(&switch_output_trigger);
215 }
216
217 static u64 record__bytes_written(struct record *rec)
218 {
219         int t;
220         u64 bytes_written = rec->bytes_written;
221         struct record_thread *thread_data = rec->thread_data;
222
223         for (t = 0; t < rec->nr_threads; t++)
224                 bytes_written += thread_data[t].bytes_written;
225
226         return bytes_written;
227 }
228
229 static bool record__output_max_size_exceeded(struct record *rec)
230 {
231         return rec->output_max_size &&
232                (record__bytes_written(rec) >= rec->output_max_size);
233 }
234
235 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
236                          void *bf, size_t size)
237 {
238         struct perf_data_file *file = &rec->session->data->file;
239
240         if (map && map->file)
241                 file = map->file;
242
243         if (perf_data_file__write(file, bf, size) < 0) {
244                 pr_err("failed to write perf data, error: %m\n");
245                 return -1;
246         }
247
248         if (map && map->file)
249                 thread->bytes_written += size;
250         else
251                 rec->bytes_written += size;
252
253         if (record__output_max_size_exceeded(rec) && !done) {
254                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
255                                 " stopping session ]\n",
256                                 record__bytes_written(rec) >> 10);
257                 done = 1;
258         }
259
260         if (switch_output_size(rec))
261                 trigger_hit(&switch_output_trigger);
262
263         return 0;
264 }
265
266 static int record__aio_enabled(struct record *rec);
267 static int record__comp_enabled(struct record *rec);
268 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
269                             void *dst, size_t dst_size, void *src, size_t src_size);
270
271 #ifdef HAVE_AIO_SUPPORT
272 static int record__aio_write(struct aiocb *cblock, int trace_fd,
273                 void *buf, size_t size, off_t off)
274 {
275         int rc;
276
277         cblock->aio_fildes = trace_fd;
278         cblock->aio_buf    = buf;
279         cblock->aio_nbytes = size;
280         cblock->aio_offset = off;
281         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
282
283         do {
284                 rc = aio_write(cblock);
285                 if (rc == 0) {
286                         break;
287                 } else if (errno != EAGAIN) {
288                         cblock->aio_fildes = -1;
289                         pr_err("failed to queue perf data, error: %m\n");
290                         break;
291                 }
292         } while (1);
293
294         return rc;
295 }
296
297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
298 {
299         void *rem_buf;
300         off_t rem_off;
301         size_t rem_size;
302         int rc, aio_errno;
303         ssize_t aio_ret, written;
304
305         aio_errno = aio_error(cblock);
306         if (aio_errno == EINPROGRESS)
307                 return 0;
308
309         written = aio_ret = aio_return(cblock);
310         if (aio_ret < 0) {
311                 if (aio_errno != EINTR)
312                         pr_err("failed to write perf data, error: %m\n");
313                 written = 0;
314         }
315
316         rem_size = cblock->aio_nbytes - written;
317
318         if (rem_size == 0) {
319                 cblock->aio_fildes = -1;
320                 /*
321                  * md->refcount is incremented in record__aio_pushfn() for
322                  * every aio write request started in record__aio_push() so
323                  * decrement it because the request is now complete.
324                  */
325                 perf_mmap__put(&md->core);
326                 rc = 1;
327         } else {
328                 /*
329                  * aio write request may require restart with the
330                  * reminder if the kernel didn't write whole
331                  * chunk at once.
332                  */
333                 rem_off = cblock->aio_offset + written;
334                 rem_buf = (void *)(cblock->aio_buf + written);
335                 record__aio_write(cblock, cblock->aio_fildes,
336                                 rem_buf, rem_size, rem_off);
337                 rc = 0;
338         }
339
340         return rc;
341 }
342
343 static int record__aio_sync(struct mmap *md, bool sync_all)
344 {
345         struct aiocb **aiocb = md->aio.aiocb;
346         struct aiocb *cblocks = md->aio.cblocks;
347         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
348         int i, do_suspend;
349
350         do {
351                 do_suspend = 0;
352                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
353                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
354                                 if (sync_all)
355                                         aiocb[i] = NULL;
356                                 else
357                                         return i;
358                         } else {
359                                 /*
360                                  * Started aio write is not complete yet
361                                  * so it has to be waited before the
362                                  * next allocation.
363                                  */
364                                 aiocb[i] = &cblocks[i];
365                                 do_suspend = 1;
366                         }
367                 }
368                 if (!do_suspend)
369                         return -1;
370
371                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
372                         if (!(errno == EAGAIN || errno == EINTR))
373                                 pr_err("failed to sync perf data, error: %m\n");
374                 }
375         } while (1);
376 }
377
378 struct record_aio {
379         struct record   *rec;
380         void            *data;
381         size_t          size;
382 };
383
384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
385 {
386         struct record_aio *aio = to;
387
388         /*
389          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
390          * to release space in the kernel buffer as fast as possible, calling
391          * perf_mmap__consume() from perf_mmap__push() function.
392          *
393          * That lets the kernel to proceed with storing more profiling data into
394          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
395          *
396          * Coping can be done in two steps in case the chunk of profiling data
397          * crosses the upper bound of the kernel buffer. In this case we first move
398          * part of data from map->start till the upper bound and then the reminder
399          * from the beginning of the kernel buffer till the end of the data chunk.
400          */
401
402         if (record__comp_enabled(aio->rec)) {
403                 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
404                                      mmap__mmap_len(map) - aio->size,
405                                      buf, size);
406         } else {
407                 memcpy(aio->data + aio->size, buf, size);
408         }
409
410         if (!aio->size) {
411                 /*
412                  * Increment map->refcount to guard map->aio.data[] buffer
413                  * from premature deallocation because map object can be
414                  * released earlier than aio write request started on
415                  * map->aio.data[] buffer is complete.
416                  *
417                  * perf_mmap__put() is done at record__aio_complete()
418                  * after started aio request completion or at record__aio_push()
419                  * if the request failed to start.
420                  */
421                 perf_mmap__get(&map->core);
422         }
423
424         aio->size += size;
425
426         return size;
427 }
428
429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
430 {
431         int ret, idx;
432         int trace_fd = rec->session->data->file.fd;
433         struct record_aio aio = { .rec = rec, .size = 0 };
434
435         /*
436          * Call record__aio_sync() to wait till map->aio.data[] buffer
437          * becomes available after previous aio write operation.
438          */
439
440         idx = record__aio_sync(map, false);
441         aio.data = map->aio.data[idx];
442         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
443         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
444                 return ret;
445
446         rec->samples++;
447         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
448         if (!ret) {
449                 *off += aio.size;
450                 rec->bytes_written += aio.size;
451                 if (switch_output_size(rec))
452                         trigger_hit(&switch_output_trigger);
453         } else {
454                 /*
455                  * Decrement map->refcount incremented in record__aio_pushfn()
456                  * back if record__aio_write() operation failed to start, otherwise
457                  * map->refcount is decremented in record__aio_complete() after
458                  * aio write operation finishes successfully.
459                  */
460                 perf_mmap__put(&map->core);
461         }
462
463         return ret;
464 }
465
466 static off_t record__aio_get_pos(int trace_fd)
467 {
468         return lseek(trace_fd, 0, SEEK_CUR);
469 }
470
471 static void record__aio_set_pos(int trace_fd, off_t pos)
472 {
473         lseek(trace_fd, pos, SEEK_SET);
474 }
475
476 static void record__aio_mmap_read_sync(struct record *rec)
477 {
478         int i;
479         struct evlist *evlist = rec->evlist;
480         struct mmap *maps = evlist->mmap;
481
482         if (!record__aio_enabled(rec))
483                 return;
484
485         for (i = 0; i < evlist->core.nr_mmaps; i++) {
486                 struct mmap *map = &maps[i];
487
488                 if (map->core.base)
489                         record__aio_sync(map, true);
490         }
491 }
492
493 static int nr_cblocks_default = 1;
494 static int nr_cblocks_max = 4;
495
496 static int record__aio_parse(const struct option *opt,
497                              const char *str,
498                              int unset)
499 {
500         struct record_opts *opts = (struct record_opts *)opt->value;
501
502         if (unset) {
503                 opts->nr_cblocks = 0;
504         } else {
505                 if (str)
506                         opts->nr_cblocks = strtol(str, NULL, 0);
507                 if (!opts->nr_cblocks)
508                         opts->nr_cblocks = nr_cblocks_default;
509         }
510
511         return 0;
512 }
513 #else /* HAVE_AIO_SUPPORT */
514 static int nr_cblocks_max = 0;
515
516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
517                             off_t *off __maybe_unused)
518 {
519         return -1;
520 }
521
522 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
523 {
524         return -1;
525 }
526
527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
528 {
529 }
530
531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
532 {
533 }
534 #endif
535
536 static int record__aio_enabled(struct record *rec)
537 {
538         return rec->opts.nr_cblocks > 0;
539 }
540
541 #define MMAP_FLUSH_DEFAULT 1
542 static int record__mmap_flush_parse(const struct option *opt,
543                                     const char *str,
544                                     int unset)
545 {
546         int flush_max;
547         struct record_opts *opts = (struct record_opts *)opt->value;
548         static struct parse_tag tags[] = {
549                         { .tag  = 'B', .mult = 1       },
550                         { .tag  = 'K', .mult = 1 << 10 },
551                         { .tag  = 'M', .mult = 1 << 20 },
552                         { .tag  = 'G', .mult = 1 << 30 },
553                         { .tag  = 0 },
554         };
555
556         if (unset)
557                 return 0;
558
559         if (str) {
560                 opts->mmap_flush = parse_tag_value(str, tags);
561                 if (opts->mmap_flush == (int)-1)
562                         opts->mmap_flush = strtol(str, NULL, 0);
563         }
564
565         if (!opts->mmap_flush)
566                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
567
568         flush_max = evlist__mmap_size(opts->mmap_pages);
569         flush_max /= 4;
570         if (opts->mmap_flush > flush_max)
571                 opts->mmap_flush = flush_max;
572
573         return 0;
574 }
575
576 #ifdef HAVE_ZSTD_SUPPORT
577 static unsigned int comp_level_default = 1;
578
579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
580 {
581         struct record_opts *opts = opt->value;
582
583         if (unset) {
584                 opts->comp_level = 0;
585         } else {
586                 if (str)
587                         opts->comp_level = strtol(str, NULL, 0);
588                 if (!opts->comp_level)
589                         opts->comp_level = comp_level_default;
590         }
591
592         return 0;
593 }
594 #endif
595 static unsigned int comp_level_max = 22;
596
597 static int record__comp_enabled(struct record *rec)
598 {
599         return rec->opts.comp_level > 0;
600 }
601
602 static int process_synthesized_event(struct perf_tool *tool,
603                                      union perf_event *event,
604                                      struct perf_sample *sample __maybe_unused,
605                                      struct machine *machine __maybe_unused)
606 {
607         struct record *rec = container_of(tool, struct record, tool);
608         return record__write(rec, NULL, event, event->header.size);
609 }
610
611 static int process_locked_synthesized_event(struct perf_tool *tool,
612                                      union perf_event *event,
613                                      struct perf_sample *sample __maybe_unused,
614                                      struct machine *machine __maybe_unused)
615 {
616         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
617         int ret;
618
619         pthread_mutex_lock(&synth_lock);
620         ret = process_synthesized_event(tool, event, sample, machine);
621         pthread_mutex_unlock(&synth_lock);
622         return ret;
623 }
624
625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
626 {
627         struct record *rec = to;
628
629         if (record__comp_enabled(rec)) {
630                 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
631                 bf   = map->data;
632         }
633
634         thread->samples++;
635         return record__write(rec, map, bf, size);
636 }
637
638 static volatile int signr = -1;
639 static volatile int child_finished;
640 #ifdef HAVE_EVENTFD_SUPPORT
641 static int done_fd = -1;
642 #endif
643
644 static void sig_handler(int sig)
645 {
646         if (sig == SIGCHLD)
647                 child_finished = 1;
648         else
649                 signr = sig;
650
651         done = 1;
652 #ifdef HAVE_EVENTFD_SUPPORT
653 {
654         u64 tmp = 1;
655         /*
656          * It is possible for this signal handler to run after done is checked
657          * in the main loop, but before the perf counter fds are polled. If this
658          * happens, the poll() will continue to wait even though done is set,
659          * and will only break out if either another signal is received, or the
660          * counters are ready for read. To ensure the poll() doesn't sleep when
661          * done is set, use an eventfd (done_fd) to wake up the poll().
662          */
663         if (write(done_fd, &tmp, sizeof(tmp)) < 0)
664                 pr_err("failed to signal wakeup fd, error: %m\n");
665 }
666 #endif // HAVE_EVENTFD_SUPPORT
667 }
668
669 static void sigsegv_handler(int sig)
670 {
671         perf_hooks__recover();
672         sighandler_dump_stack(sig);
673 }
674
675 static void record__sig_exit(void)
676 {
677         if (signr == -1)
678                 return;
679
680         signal(signr, SIG_DFL);
681         raise(signr);
682 }
683
684 #ifdef HAVE_AUXTRACE_SUPPORT
685
686 static int record__process_auxtrace(struct perf_tool *tool,
687                                     struct mmap *map,
688                                     union perf_event *event, void *data1,
689                                     size_t len1, void *data2, size_t len2)
690 {
691         struct record *rec = container_of(tool, struct record, tool);
692         struct perf_data *data = &rec->data;
693         size_t padding;
694         u8 pad[8] = {0};
695
696         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
697                 off_t file_offset;
698                 int fd = perf_data__fd(data);
699                 int err;
700
701                 file_offset = lseek(fd, 0, SEEK_CUR);
702                 if (file_offset == -1)
703                         return -1;
704                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
705                                                      event, file_offset);
706                 if (err)
707                         return err;
708         }
709
710         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
711         padding = (len1 + len2) & 7;
712         if (padding)
713                 padding = 8 - padding;
714
715         record__write(rec, map, event, event->header.size);
716         record__write(rec, map, data1, len1);
717         if (len2)
718                 record__write(rec, map, data2, len2);
719         record__write(rec, map, &pad, padding);
720
721         return 0;
722 }
723
724 static int record__auxtrace_mmap_read(struct record *rec,
725                                       struct mmap *map)
726 {
727         int ret;
728
729         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
730                                   record__process_auxtrace);
731         if (ret < 0)
732                 return ret;
733
734         if (ret)
735                 rec->samples++;
736
737         return 0;
738 }
739
740 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
741                                                struct mmap *map)
742 {
743         int ret;
744
745         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
746                                            record__process_auxtrace,
747                                            rec->opts.auxtrace_snapshot_size);
748         if (ret < 0)
749                 return ret;
750
751         if (ret)
752                 rec->samples++;
753
754         return 0;
755 }
756
757 static int record__auxtrace_read_snapshot_all(struct record *rec)
758 {
759         int i;
760         int rc = 0;
761
762         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
763                 struct mmap *map = &rec->evlist->mmap[i];
764
765                 if (!map->auxtrace_mmap.base)
766                         continue;
767
768                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
769                         rc = -1;
770                         goto out;
771                 }
772         }
773 out:
774         return rc;
775 }
776
777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
778 {
779         pr_debug("Recording AUX area tracing snapshot\n");
780         if (record__auxtrace_read_snapshot_all(rec) < 0) {
781                 trigger_error(&auxtrace_snapshot_trigger);
782         } else {
783                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
784                         trigger_error(&auxtrace_snapshot_trigger);
785                 else
786                         trigger_ready(&auxtrace_snapshot_trigger);
787         }
788 }
789
790 static int record__auxtrace_snapshot_exit(struct record *rec)
791 {
792         if (trigger_is_error(&auxtrace_snapshot_trigger))
793                 return 0;
794
795         if (!auxtrace_record__snapshot_started &&
796             auxtrace_record__snapshot_start(rec->itr))
797                 return -1;
798
799         record__read_auxtrace_snapshot(rec, true);
800         if (trigger_is_error(&auxtrace_snapshot_trigger))
801                 return -1;
802
803         return 0;
804 }
805
806 static int record__auxtrace_init(struct record *rec)
807 {
808         int err;
809
810         if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
811             && record__threads_enabled(rec)) {
812                 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
813                 return -EINVAL;
814         }
815
816         if (!rec->itr) {
817                 rec->itr = auxtrace_record__init(rec->evlist, &err);
818                 if (err)
819                         return err;
820         }
821
822         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
823                                               rec->opts.auxtrace_snapshot_opts);
824         if (err)
825                 return err;
826
827         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
828                                             rec->opts.auxtrace_sample_opts);
829         if (err)
830                 return err;
831
832         auxtrace_regroup_aux_output(rec->evlist);
833
834         return auxtrace_parse_filters(rec->evlist);
835 }
836
837 #else
838
839 static inline
840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
841                                struct mmap *map __maybe_unused)
842 {
843         return 0;
844 }
845
846 static inline
847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
848                                     bool on_exit __maybe_unused)
849 {
850 }
851
852 static inline
853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
854 {
855         return 0;
856 }
857
858 static inline
859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
860 {
861         return 0;
862 }
863
864 static int record__auxtrace_init(struct record *rec __maybe_unused)
865 {
866         return 0;
867 }
868
869 #endif
870
871 static int record__config_text_poke(struct evlist *evlist)
872 {
873         struct evsel *evsel;
874
875         /* Nothing to do if text poke is already configured */
876         evlist__for_each_entry(evlist, evsel) {
877                 if (evsel->core.attr.text_poke)
878                         return 0;
879         }
880
881         evsel = evlist__add_dummy_on_all_cpus(evlist);
882         if (!evsel)
883                 return -ENOMEM;
884
885         evsel->core.attr.text_poke = 1;
886         evsel->core.attr.ksymbol = 1;
887         evsel->immediate = true;
888         evsel__set_sample_bit(evsel, TIME);
889
890         return 0;
891 }
892
893 static int record__config_off_cpu(struct record *rec)
894 {
895         return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
896 }
897
898 static bool record__kcore_readable(struct machine *machine)
899 {
900         char kcore[PATH_MAX];
901         int fd;
902
903         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
904
905         fd = open(kcore, O_RDONLY);
906         if (fd < 0)
907                 return false;
908
909         close(fd);
910
911         return true;
912 }
913
914 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
915 {
916         char from_dir[PATH_MAX];
917         char kcore_dir[PATH_MAX];
918         int ret;
919
920         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
921
922         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
923         if (ret)
924                 return ret;
925
926         return kcore_copy(from_dir, kcore_dir);
927 }
928
929 static void record__thread_data_init_pipes(struct record_thread *thread_data)
930 {
931         thread_data->pipes.msg[0] = -1;
932         thread_data->pipes.msg[1] = -1;
933         thread_data->pipes.ack[0] = -1;
934         thread_data->pipes.ack[1] = -1;
935 }
936
937 static int record__thread_data_open_pipes(struct record_thread *thread_data)
938 {
939         if (pipe(thread_data->pipes.msg))
940                 return -EINVAL;
941
942         if (pipe(thread_data->pipes.ack)) {
943                 close(thread_data->pipes.msg[0]);
944                 thread_data->pipes.msg[0] = -1;
945                 close(thread_data->pipes.msg[1]);
946                 thread_data->pipes.msg[1] = -1;
947                 return -EINVAL;
948         }
949
950         pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
951                  thread_data->pipes.msg[0], thread_data->pipes.msg[1],
952                  thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
953
954         return 0;
955 }
956
957 static void record__thread_data_close_pipes(struct record_thread *thread_data)
958 {
959         if (thread_data->pipes.msg[0] != -1) {
960                 close(thread_data->pipes.msg[0]);
961                 thread_data->pipes.msg[0] = -1;
962         }
963         if (thread_data->pipes.msg[1] != -1) {
964                 close(thread_data->pipes.msg[1]);
965                 thread_data->pipes.msg[1] = -1;
966         }
967         if (thread_data->pipes.ack[0] != -1) {
968                 close(thread_data->pipes.ack[0]);
969                 thread_data->pipes.ack[0] = -1;
970         }
971         if (thread_data->pipes.ack[1] != -1) {
972                 close(thread_data->pipes.ack[1]);
973                 thread_data->pipes.ack[1] = -1;
974         }
975 }
976
977 static bool evlist__per_thread(struct evlist *evlist)
978 {
979         return cpu_map__is_dummy(evlist->core.user_requested_cpus);
980 }
981
982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
983 {
984         int m, tm, nr_mmaps = evlist->core.nr_mmaps;
985         struct mmap *mmap = evlist->mmap;
986         struct mmap *overwrite_mmap = evlist->overwrite_mmap;
987         struct perf_cpu_map *cpus = evlist->core.all_cpus;
988         bool per_thread = evlist__per_thread(evlist);
989
990         if (per_thread)
991                 thread_data->nr_mmaps = nr_mmaps;
992         else
993                 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
994                                                       thread_data->mask->maps.nbits);
995         if (mmap) {
996                 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
997                 if (!thread_data->maps)
998                         return -ENOMEM;
999         }
1000         if (overwrite_mmap) {
1001                 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1002                 if (!thread_data->overwrite_maps) {
1003                         zfree(&thread_data->maps);
1004                         return -ENOMEM;
1005                 }
1006         }
1007         pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1008                  thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1009
1010         for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1011                 if (per_thread ||
1012                     test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1013                         if (thread_data->maps) {
1014                                 thread_data->maps[tm] = &mmap[m];
1015                                 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1016                                           thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1017                         }
1018                         if (thread_data->overwrite_maps) {
1019                                 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1020                                 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1021                                           thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1022                         }
1023                         tm++;
1024                 }
1025         }
1026
1027         return 0;
1028 }
1029
1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1031 {
1032         int f, tm, pos;
1033         struct mmap *map, *overwrite_map;
1034
1035         fdarray__init(&thread_data->pollfd, 64);
1036
1037         for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1038                 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1039                 overwrite_map = thread_data->overwrite_maps ?
1040                                 thread_data->overwrite_maps[tm] : NULL;
1041
1042                 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1043                         void *ptr = evlist->core.pollfd.priv[f].ptr;
1044
1045                         if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1046                                 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1047                                                               &evlist->core.pollfd);
1048                                 if (pos < 0)
1049                                         return pos;
1050                                 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1051                                          thread_data, pos, evlist->core.pollfd.entries[f].fd);
1052                         }
1053                 }
1054         }
1055
1056         return 0;
1057 }
1058
1059 static void record__free_thread_data(struct record *rec)
1060 {
1061         int t;
1062         struct record_thread *thread_data = rec->thread_data;
1063
1064         if (thread_data == NULL)
1065                 return;
1066
1067         for (t = 0; t < rec->nr_threads; t++) {
1068                 record__thread_data_close_pipes(&thread_data[t]);
1069                 zfree(&thread_data[t].maps);
1070                 zfree(&thread_data[t].overwrite_maps);
1071                 fdarray__exit(&thread_data[t].pollfd);
1072         }
1073
1074         zfree(&rec->thread_data);
1075 }
1076
1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1078 {
1079         int t, ret;
1080         struct record_thread *thread_data;
1081
1082         rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1083         if (!rec->thread_data) {
1084                 pr_err("Failed to allocate thread data\n");
1085                 return -ENOMEM;
1086         }
1087         thread_data = rec->thread_data;
1088
1089         for (t = 0; t < rec->nr_threads; t++)
1090                 record__thread_data_init_pipes(&thread_data[t]);
1091
1092         for (t = 0; t < rec->nr_threads; t++) {
1093                 thread_data[t].rec = rec;
1094                 thread_data[t].mask = &rec->thread_masks[t];
1095                 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1096                 if (ret) {
1097                         pr_err("Failed to initialize thread[%d] maps\n", t);
1098                         goto out_free;
1099                 }
1100                 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1101                 if (ret) {
1102                         pr_err("Failed to initialize thread[%d] pollfd\n", t);
1103                         goto out_free;
1104                 }
1105                 if (t) {
1106                         thread_data[t].tid = -1;
1107                         ret = record__thread_data_open_pipes(&thread_data[t]);
1108                         if (ret) {
1109                                 pr_err("Failed to open thread[%d] communication pipes\n", t);
1110                                 goto out_free;
1111                         }
1112                         ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1113                                            POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1114                         if (ret < 0) {
1115                                 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1116                                 goto out_free;
1117                         }
1118                         thread_data[t].ctlfd_pos = ret;
1119                         pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1120                                  thread_data, thread_data[t].ctlfd_pos,
1121                                  thread_data[t].pipes.msg[0]);
1122                 } else {
1123                         thread_data[t].tid = gettid();
1124                         if (evlist->ctl_fd.pos == -1)
1125                                 continue;
1126                         ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1127                                                       &evlist->core.pollfd);
1128                         if (ret < 0) {
1129                                 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1130                                 goto out_free;
1131                         }
1132                         thread_data[t].ctlfd_pos = ret;
1133                         pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1134                                  thread_data, thread_data[t].ctlfd_pos,
1135                                  evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1136                 }
1137         }
1138
1139         return 0;
1140
1141 out_free:
1142         record__free_thread_data(rec);
1143
1144         return ret;
1145 }
1146
1147 static int record__mmap_evlist(struct record *rec,
1148                                struct evlist *evlist)
1149 {
1150         int i, ret;
1151         struct record_opts *opts = &rec->opts;
1152         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1153                                   opts->auxtrace_sample_mode;
1154         char msg[512];
1155
1156         if (opts->affinity != PERF_AFFINITY_SYS)
1157                 cpu__setup_cpunode_map();
1158
1159         if (evlist__mmap_ex(evlist, opts->mmap_pages,
1160                                  opts->auxtrace_mmap_pages,
1161                                  auxtrace_overwrite,
1162                                  opts->nr_cblocks, opts->affinity,
1163                                  opts->mmap_flush, opts->comp_level) < 0) {
1164                 if (errno == EPERM) {
1165                         pr_err("Permission error mapping pages.\n"
1166                                "Consider increasing "
1167                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
1168                                "or try again with a smaller value of -m/--mmap_pages.\n"
1169                                "(current value: %u,%u)\n",
1170                                opts->mmap_pages, opts->auxtrace_mmap_pages);
1171                         return -errno;
1172                 } else {
1173                         pr_err("failed to mmap with %d (%s)\n", errno,
1174                                 str_error_r(errno, msg, sizeof(msg)));
1175                         if (errno)
1176                                 return -errno;
1177                         else
1178                                 return -EINVAL;
1179                 }
1180         }
1181
1182         if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1183                 return -1;
1184
1185         ret = record__alloc_thread_data(rec, evlist);
1186         if (ret)
1187                 return ret;
1188
1189         if (record__threads_enabled(rec)) {
1190                 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1191                 if (ret) {
1192                         pr_err("Failed to create data directory: %s\n", strerror(-ret));
1193                         return ret;
1194                 }
1195                 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1196                         if (evlist->mmap)
1197                                 evlist->mmap[i].file = &rec->data.dir.files[i];
1198                         if (evlist->overwrite_mmap)
1199                                 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1200                 }
1201         }
1202
1203         return 0;
1204 }
1205
1206 static int record__mmap(struct record *rec)
1207 {
1208         return record__mmap_evlist(rec, rec->evlist);
1209 }
1210
1211 static int record__open(struct record *rec)
1212 {
1213         char msg[BUFSIZ];
1214         struct evsel *pos;
1215         struct evlist *evlist = rec->evlist;
1216         struct perf_session *session = rec->session;
1217         struct record_opts *opts = &rec->opts;
1218         int rc = 0;
1219
1220         /*
1221          * For initial_delay, system wide or a hybrid system, we need to add a
1222          * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1223          * of waiting or event synthesis.
1224          */
1225         if (opts->initial_delay || target__has_cpu(&opts->target) ||
1226             perf_pmu__has_hybrid()) {
1227                 pos = evlist__get_tracking_event(evlist);
1228                 if (!evsel__is_dummy_event(pos)) {
1229                         /* Set up dummy event. */
1230                         if (evlist__add_dummy(evlist))
1231                                 return -ENOMEM;
1232                         pos = evlist__last(evlist);
1233                         evlist__set_tracking_event(evlist, pos);
1234                 }
1235
1236                 /*
1237                  * Enable the dummy event when the process is forked for
1238                  * initial_delay, immediately for system wide.
1239                  */
1240                 if (opts->initial_delay && !pos->immediate &&
1241                     !target__has_cpu(&opts->target))
1242                         pos->core.attr.enable_on_exec = 1;
1243                 else
1244                         pos->immediate = 1;
1245         }
1246
1247         evlist__config(evlist, opts, &callchain_param);
1248
1249         evlist__for_each_entry(evlist, pos) {
1250 try_again:
1251                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1252                         if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1253                                 if (verbose > 0)
1254                                         ui__warning("%s\n", msg);
1255                                 goto try_again;
1256                         }
1257                         if ((errno == EINVAL || errno == EBADF) &&
1258                             pos->core.leader != &pos->core &&
1259                             pos->weak_group) {
1260                                 pos = evlist__reset_weak_group(evlist, pos, true);
1261                                 goto try_again;
1262                         }
1263                         rc = -errno;
1264                         evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1265                         ui__error("%s\n", msg);
1266                         goto out;
1267                 }
1268
1269                 pos->supported = true;
1270         }
1271
1272         if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1273                 pr_warning(
1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1277 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1278 "Samples in kernel modules won't be resolved at all.\n\n"
1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1280 "even with a suitable vmlinux or kallsyms file.\n\n");
1281         }
1282
1283         if (evlist__apply_filters(evlist, &pos)) {
1284                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1285                         pos->filter, evsel__name(pos), errno,
1286                         str_error_r(errno, msg, sizeof(msg)));
1287                 rc = -1;
1288                 goto out;
1289         }
1290
1291         rc = record__mmap(rec);
1292         if (rc)
1293                 goto out;
1294
1295         session->evlist = evlist;
1296         perf_session__set_id_hdr_size(session);
1297 out:
1298         return rc;
1299 }
1300
1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1302 {
1303         if (rec->evlist->first_sample_time == 0)
1304                 rec->evlist->first_sample_time = sample_time;
1305
1306         if (sample_time)
1307                 rec->evlist->last_sample_time = sample_time;
1308 }
1309
1310 static int process_sample_event(struct perf_tool *tool,
1311                                 union perf_event *event,
1312                                 struct perf_sample *sample,
1313                                 struct evsel *evsel,
1314                                 struct machine *machine)
1315 {
1316         struct record *rec = container_of(tool, struct record, tool);
1317
1318         set_timestamp_boundary(rec, sample->time);
1319
1320         if (rec->buildid_all)
1321                 return 0;
1322
1323         rec->samples++;
1324         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1325 }
1326
1327 static int process_buildids(struct record *rec)
1328 {
1329         struct perf_session *session = rec->session;
1330
1331         if (perf_data__size(&rec->data) == 0)
1332                 return 0;
1333
1334         /*
1335          * During this process, it'll load kernel map and replace the
1336          * dso->long_name to a real pathname it found.  In this case
1337          * we prefer the vmlinux path like
1338          *   /lib/modules/3.16.4/build/vmlinux
1339          *
1340          * rather than build-id path (in debug directory).
1341          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1342          */
1343         symbol_conf.ignore_vmlinux_buildid = true;
1344
1345         /*
1346          * If --buildid-all is given, it marks all DSO regardless of hits,
1347          * so no need to process samples. But if timestamp_boundary is enabled,
1348          * it still needs to walk on all samples to get the timestamps of
1349          * first/last samples.
1350          */
1351         if (rec->buildid_all && !rec->timestamp_boundary)
1352                 rec->tool.sample = NULL;
1353
1354         return perf_session__process_events(session);
1355 }
1356
1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1358 {
1359         int err;
1360         struct perf_tool *tool = data;
1361         /*
1362          *As for guest kernel when processing subcommand record&report,
1363          *we arrange module mmap prior to guest kernel mmap and trigger
1364          *a preload dso because default guest module symbols are loaded
1365          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1366          *method is used to avoid symbol missing when the first addr is
1367          *in module instead of in guest kernel.
1368          */
1369         err = perf_event__synthesize_modules(tool, process_synthesized_event,
1370                                              machine);
1371         if (err < 0)
1372                 pr_err("Couldn't record guest kernel [%d]'s reference"
1373                        " relocation symbol.\n", machine->pid);
1374
1375         /*
1376          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1377          * have no _text sometimes.
1378          */
1379         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1380                                                  machine);
1381         if (err < 0)
1382                 pr_err("Couldn't record guest kernel [%d]'s reference"
1383                        " relocation symbol.\n", machine->pid);
1384 }
1385
1386 static struct perf_event_header finished_round_event = {
1387         .size = sizeof(struct perf_event_header),
1388         .type = PERF_RECORD_FINISHED_ROUND,
1389 };
1390
1391 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1392 {
1393         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1394             !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1395                           thread->mask->affinity.nbits)) {
1396                 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1397                 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1398                           map->affinity_mask.bits, thread->mask->affinity.nbits);
1399                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1400                                         (cpu_set_t *)thread->mask->affinity.bits);
1401                 if (verbose == 2) {
1402                         pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1403                         mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1404                 }
1405         }
1406 }
1407
1408 static size_t process_comp_header(void *record, size_t increment)
1409 {
1410         struct perf_record_compressed *event = record;
1411         size_t size = sizeof(*event);
1412
1413         if (increment) {
1414                 event->header.size += increment;
1415                 return increment;
1416         }
1417
1418         event->header.type = PERF_RECORD_COMPRESSED;
1419         event->header.size = size;
1420
1421         return size;
1422 }
1423
1424 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1425                             void *dst, size_t dst_size, void *src, size_t src_size)
1426 {
1427         size_t compressed;
1428         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1429         struct zstd_data *zstd_data = &session->zstd_data;
1430
1431         if (map && map->file)
1432                 zstd_data = &map->zstd_data;
1433
1434         compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1435                                                      max_record_size, process_comp_header);
1436
1437         if (map && map->file) {
1438                 thread->bytes_transferred += src_size;
1439                 thread->bytes_compressed  += compressed;
1440         } else {
1441                 session->bytes_transferred += src_size;
1442                 session->bytes_compressed  += compressed;
1443         }
1444
1445         return compressed;
1446 }
1447
1448 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1449                                     bool overwrite, bool synch)
1450 {
1451         u64 bytes_written = rec->bytes_written;
1452         int i;
1453         int rc = 0;
1454         int nr_mmaps;
1455         struct mmap **maps;
1456         int trace_fd = rec->data.file.fd;
1457         off_t off = 0;
1458
1459         if (!evlist)
1460                 return 0;
1461
1462         nr_mmaps = thread->nr_mmaps;
1463         maps = overwrite ? thread->overwrite_maps : thread->maps;
1464
1465         if (!maps)
1466                 return 0;
1467
1468         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1469                 return 0;
1470
1471         if (record__aio_enabled(rec))
1472                 off = record__aio_get_pos(trace_fd);
1473
1474         for (i = 0; i < nr_mmaps; i++) {
1475                 u64 flush = 0;
1476                 struct mmap *map = maps[i];
1477
1478                 if (map->core.base) {
1479                         record__adjust_affinity(rec, map);
1480                         if (synch) {
1481                                 flush = map->core.flush;
1482                                 map->core.flush = 1;
1483                         }
1484                         if (!record__aio_enabled(rec)) {
1485                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1486                                         if (synch)
1487                                                 map->core.flush = flush;
1488                                         rc = -1;
1489                                         goto out;
1490                                 }
1491                         } else {
1492                                 if (record__aio_push(rec, map, &off) < 0) {
1493                                         record__aio_set_pos(trace_fd, off);
1494                                         if (synch)
1495                                                 map->core.flush = flush;
1496                                         rc = -1;
1497                                         goto out;
1498                                 }
1499                         }
1500                         if (synch)
1501                                 map->core.flush = flush;
1502                 }
1503
1504                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1505                     !rec->opts.auxtrace_sample_mode &&
1506                     record__auxtrace_mmap_read(rec, map) != 0) {
1507                         rc = -1;
1508                         goto out;
1509                 }
1510         }
1511
1512         if (record__aio_enabled(rec))
1513                 record__aio_set_pos(trace_fd, off);
1514
1515         /*
1516          * Mark the round finished in case we wrote
1517          * at least one event.
1518          *
1519          * No need for round events in directory mode,
1520          * because per-cpu maps and files have data
1521          * sorted by kernel.
1522          */
1523         if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1524                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1525
1526         if (overwrite)
1527                 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1528 out:
1529         return rc;
1530 }
1531
1532 static int record__mmap_read_all(struct record *rec, bool synch)
1533 {
1534         int err;
1535
1536         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1537         if (err)
1538                 return err;
1539
1540         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1541 }
1542
1543 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1544                                            void *arg __maybe_unused)
1545 {
1546         struct perf_mmap *map = fda->priv[fd].ptr;
1547
1548         if (map)
1549                 perf_mmap__put(map);
1550 }
1551
1552 static void *record__thread(void *arg)
1553 {
1554         enum thread_msg msg = THREAD_MSG__READY;
1555         bool terminate = false;
1556         struct fdarray *pollfd;
1557         int err, ctlfd_pos;
1558
1559         thread = arg;
1560         thread->tid = gettid();
1561
1562         err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1563         if (err == -1)
1564                 pr_warning("threads[%d]: failed to notify on start: %s\n",
1565                            thread->tid, strerror(errno));
1566
1567         pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1568
1569         pollfd = &thread->pollfd;
1570         ctlfd_pos = thread->ctlfd_pos;
1571
1572         for (;;) {
1573                 unsigned long long hits = thread->samples;
1574
1575                 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1576                         break;
1577
1578                 if (hits == thread->samples) {
1579
1580                         err = fdarray__poll(pollfd, -1);
1581                         /*
1582                          * Propagate error, only if there's any. Ignore positive
1583                          * number of returned events and interrupt error.
1584                          */
1585                         if (err > 0 || (err < 0 && errno == EINTR))
1586                                 err = 0;
1587                         thread->waking++;
1588
1589                         if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1590                                             record__thread_munmap_filtered, NULL) == 0)
1591                                 break;
1592                 }
1593
1594                 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1595                         terminate = true;
1596                         close(thread->pipes.msg[0]);
1597                         thread->pipes.msg[0] = -1;
1598                         pollfd->entries[ctlfd_pos].fd = -1;
1599                         pollfd->entries[ctlfd_pos].events = 0;
1600                 }
1601
1602                 pollfd->entries[ctlfd_pos].revents = 0;
1603         }
1604         record__mmap_read_all(thread->rec, true);
1605
1606         err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1607         if (err == -1)
1608                 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1609                            thread->tid, strerror(errno));
1610
1611         return NULL;
1612 }
1613
1614 static void record__init_features(struct record *rec)
1615 {
1616         struct perf_session *session = rec->session;
1617         int feat;
1618
1619         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1620                 perf_header__set_feat(&session->header, feat);
1621
1622         if (rec->no_buildid)
1623                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1624
1625         if (!have_tracepoints(&rec->evlist->core.entries))
1626                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1627
1628         if (!rec->opts.branch_stack)
1629                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1630
1631         if (!rec->opts.full_auxtrace)
1632                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1633
1634         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1635                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1636
1637         if (!rec->opts.use_clockid)
1638                 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1639
1640         if (!record__threads_enabled(rec))
1641                 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1642
1643         if (!record__comp_enabled(rec))
1644                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1645
1646         perf_header__clear_feat(&session->header, HEADER_STAT);
1647 }
1648
1649 static void
1650 record__finish_output(struct record *rec)
1651 {
1652         int i;
1653         struct perf_data *data = &rec->data;
1654         int fd = perf_data__fd(data);
1655
1656         if (data->is_pipe)
1657                 return;
1658
1659         rec->session->header.data_size += rec->bytes_written;
1660         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1661         if (record__threads_enabled(rec)) {
1662                 for (i = 0; i < data->dir.nr; i++)
1663                         data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1664         }
1665
1666         if (!rec->no_buildid) {
1667                 process_buildids(rec);
1668
1669                 if (rec->buildid_all)
1670                         dsos__hit_all(rec->session);
1671         }
1672         perf_session__write_header(rec->session, rec->evlist, fd, true);
1673
1674         return;
1675 }
1676
1677 static int record__synthesize_workload(struct record *rec, bool tail)
1678 {
1679         int err;
1680         struct perf_thread_map *thread_map;
1681         bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1682
1683         if (rec->opts.tail_synthesize != tail)
1684                 return 0;
1685
1686         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1687         if (thread_map == NULL)
1688                 return -1;
1689
1690         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1691                                                  process_synthesized_event,
1692                                                  &rec->session->machines.host,
1693                                                  needs_mmap,
1694                                                  rec->opts.sample_address);
1695         perf_thread_map__put(thread_map);
1696         return err;
1697 }
1698
1699 static int record__synthesize(struct record *rec, bool tail);
1700
1701 static int
1702 record__switch_output(struct record *rec, bool at_exit)
1703 {
1704         struct perf_data *data = &rec->data;
1705         int fd, err;
1706         char *new_filename;
1707
1708         /* Same Size:      "2015122520103046"*/
1709         char timestamp[] = "InvalidTimestamp";
1710
1711         record__aio_mmap_read_sync(rec);
1712
1713         record__synthesize(rec, true);
1714         if (target__none(&rec->opts.target))
1715                 record__synthesize_workload(rec, true);
1716
1717         rec->samples = 0;
1718         record__finish_output(rec);
1719         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1720         if (err) {
1721                 pr_err("Failed to get current timestamp\n");
1722                 return -EINVAL;
1723         }
1724
1725         fd = perf_data__switch(data, timestamp,
1726                                     rec->session->header.data_offset,
1727                                     at_exit, &new_filename);
1728         if (fd >= 0 && !at_exit) {
1729                 rec->bytes_written = 0;
1730                 rec->session->header.data_size = 0;
1731         }
1732
1733         if (!quiet)
1734                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1735                         data->path, timestamp);
1736
1737         if (rec->switch_output.num_files) {
1738                 int n = rec->switch_output.cur_file + 1;
1739
1740                 if (n >= rec->switch_output.num_files)
1741                         n = 0;
1742                 rec->switch_output.cur_file = n;
1743                 if (rec->switch_output.filenames[n]) {
1744                         remove(rec->switch_output.filenames[n]);
1745                         zfree(&rec->switch_output.filenames[n]);
1746                 }
1747                 rec->switch_output.filenames[n] = new_filename;
1748         } else {
1749                 free(new_filename);
1750         }
1751
1752         /* Output tracking events */
1753         if (!at_exit) {
1754                 record__synthesize(rec, false);
1755
1756                 /*
1757                  * In 'perf record --switch-output' without -a,
1758                  * record__synthesize() in record__switch_output() won't
1759                  * generate tracking events because there's no thread_map
1760                  * in evlist. Which causes newly created perf.data doesn't
1761                  * contain map and comm information.
1762                  * Create a fake thread_map and directly call
1763                  * perf_event__synthesize_thread_map() for those events.
1764                  */
1765                 if (target__none(&rec->opts.target))
1766                         record__synthesize_workload(rec, false);
1767         }
1768         return fd;
1769 }
1770
1771 static volatile int workload_exec_errno;
1772
1773 /*
1774  * evlist__prepare_workload will send a SIGUSR1
1775  * if the fork fails, since we asked by setting its
1776  * want_signal to true.
1777  */
1778 static void workload_exec_failed_signal(int signo __maybe_unused,
1779                                         siginfo_t *info,
1780                                         void *ucontext __maybe_unused)
1781 {
1782         workload_exec_errno = info->si_value.sival_int;
1783         done = 1;
1784         child_finished = 1;
1785 }
1786
1787 static void snapshot_sig_handler(int sig);
1788 static void alarm_sig_handler(int sig);
1789
1790 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1791 {
1792         if (evlist) {
1793                 if (evlist->mmap && evlist->mmap[0].core.base)
1794                         return evlist->mmap[0].core.base;
1795                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1796                         return evlist->overwrite_mmap[0].core.base;
1797         }
1798         return NULL;
1799 }
1800
1801 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1802 {
1803         const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1804         if (pc)
1805                 return pc;
1806         return NULL;
1807 }
1808
1809 static int record__synthesize(struct record *rec, bool tail)
1810 {
1811         struct perf_session *session = rec->session;
1812         struct machine *machine = &session->machines.host;
1813         struct perf_data *data = &rec->data;
1814         struct record_opts *opts = &rec->opts;
1815         struct perf_tool *tool = &rec->tool;
1816         int err = 0;
1817         event_op f = process_synthesized_event;
1818
1819         if (rec->opts.tail_synthesize != tail)
1820                 return 0;
1821
1822         if (data->is_pipe) {
1823                 err = perf_event__synthesize_for_pipe(tool, session, data,
1824                                                       process_synthesized_event);
1825                 if (err < 0)
1826                         goto out;
1827
1828                 rec->bytes_written += err;
1829         }
1830
1831         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1832                                           process_synthesized_event, machine);
1833         if (err)
1834                 goto out;
1835
1836         /* Synthesize id_index before auxtrace_info */
1837         if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1838                 err = perf_event__synthesize_id_index(tool,
1839                                                       process_synthesized_event,
1840                                                       session->evlist, machine);
1841                 if (err)
1842                         goto out;
1843         }
1844
1845         if (rec->opts.full_auxtrace) {
1846                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1847                                         session, process_synthesized_event);
1848                 if (err)
1849                         goto out;
1850         }
1851
1852         if (!evlist__exclude_kernel(rec->evlist)) {
1853                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1854                                                          machine);
1855                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1856                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1857                                    "Check /proc/kallsyms permission or run as root.\n");
1858
1859                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1860                                                      machine);
1861                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1862                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1863                                    "Check /proc/modules permission or run as root.\n");
1864         }
1865
1866         if (perf_guest) {
1867                 machines__process_guests(&session->machines,
1868                                          perf_event__synthesize_guest_os, tool);
1869         }
1870
1871         err = perf_event__synthesize_extra_attr(&rec->tool,
1872                                                 rec->evlist,
1873                                                 process_synthesized_event,
1874                                                 data->is_pipe);
1875         if (err)
1876                 goto out;
1877
1878         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1879                                                  process_synthesized_event,
1880                                                 NULL);
1881         if (err < 0) {
1882                 pr_err("Couldn't synthesize thread map.\n");
1883                 return err;
1884         }
1885
1886         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
1887                                              process_synthesized_event, NULL);
1888         if (err < 0) {
1889                 pr_err("Couldn't synthesize cpu map.\n");
1890                 return err;
1891         }
1892
1893         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1894                                                 machine, opts);
1895         if (err < 0)
1896                 pr_warning("Couldn't synthesize bpf events.\n");
1897
1898         if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1899                 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1900                                                      machine);
1901                 if (err < 0)
1902                         pr_warning("Couldn't synthesize cgroup events.\n");
1903         }
1904
1905         if (rec->opts.nr_threads_synthesize > 1) {
1906                 perf_set_multithreaded();
1907                 f = process_locked_synthesized_event;
1908         }
1909
1910         if (rec->opts.synth & PERF_SYNTH_TASK) {
1911                 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1912
1913                 err = __machine__synthesize_threads(machine, tool, &opts->target,
1914                                                     rec->evlist->core.threads,
1915                                                     f, needs_mmap, opts->sample_address,
1916                                                     rec->opts.nr_threads_synthesize);
1917         }
1918
1919         if (rec->opts.nr_threads_synthesize > 1)
1920                 perf_set_singlethreaded();
1921
1922 out:
1923         return err;
1924 }
1925
1926 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1927 {
1928         struct record *rec = data;
1929         pthread_kill(rec->thread_id, SIGUSR2);
1930         return 0;
1931 }
1932
1933 static int record__setup_sb_evlist(struct record *rec)
1934 {
1935         struct record_opts *opts = &rec->opts;
1936
1937         if (rec->sb_evlist != NULL) {
1938                 /*
1939                  * We get here if --switch-output-event populated the
1940                  * sb_evlist, so associate a callback that will send a SIGUSR2
1941                  * to the main thread.
1942                  */
1943                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1944                 rec->thread_id = pthread_self();
1945         }
1946 #ifdef HAVE_LIBBPF_SUPPORT
1947         if (!opts->no_bpf_event) {
1948                 if (rec->sb_evlist == NULL) {
1949                         rec->sb_evlist = evlist__new();
1950
1951                         if (rec->sb_evlist == NULL) {
1952                                 pr_err("Couldn't create side band evlist.\n.");
1953                                 return -1;
1954                         }
1955                 }
1956
1957                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1958                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1959                         return -1;
1960                 }
1961         }
1962 #endif
1963         if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1964                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1965                 opts->no_bpf_event = true;
1966         }
1967
1968         return 0;
1969 }
1970
1971 static int record__init_clock(struct record *rec)
1972 {
1973         struct perf_session *session = rec->session;
1974         struct timespec ref_clockid;
1975         struct timeval ref_tod;
1976         u64 ref;
1977
1978         if (!rec->opts.use_clockid)
1979                 return 0;
1980
1981         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1982                 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1983
1984         session->header.env.clock.clockid = rec->opts.clockid;
1985
1986         if (gettimeofday(&ref_tod, NULL) != 0) {
1987                 pr_err("gettimeofday failed, cannot set reference time.\n");
1988                 return -1;
1989         }
1990
1991         if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1992                 pr_err("clock_gettime failed, cannot set reference time.\n");
1993                 return -1;
1994         }
1995
1996         ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1997               (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1998
1999         session->header.env.clock.tod_ns = ref;
2000
2001         ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2002               (u64) ref_clockid.tv_nsec;
2003
2004         session->header.env.clock.clockid_ns = ref;
2005         return 0;
2006 }
2007
2008 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2009 {
2010         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2011                 trigger_hit(&auxtrace_snapshot_trigger);
2012                 auxtrace_record__snapshot_started = 1;
2013                 if (auxtrace_record__snapshot_start(rec->itr))
2014                         trigger_error(&auxtrace_snapshot_trigger);
2015         }
2016 }
2017
2018 static void record__uniquify_name(struct record *rec)
2019 {
2020         struct evsel *pos;
2021         struct evlist *evlist = rec->evlist;
2022         char *new_name;
2023         int ret;
2024
2025         if (!perf_pmu__has_hybrid())
2026                 return;
2027
2028         evlist__for_each_entry(evlist, pos) {
2029                 if (!evsel__is_hybrid(pos))
2030                         continue;
2031
2032                 if (strchr(pos->name, '/'))
2033                         continue;
2034
2035                 ret = asprintf(&new_name, "%s/%s/",
2036                                pos->pmu_name, pos->name);
2037                 if (ret) {
2038                         free(pos->name);
2039                         pos->name = new_name;
2040                 }
2041         }
2042 }
2043
2044 static int record__terminate_thread(struct record_thread *thread_data)
2045 {
2046         int err;
2047         enum thread_msg ack = THREAD_MSG__UNDEFINED;
2048         pid_t tid = thread_data->tid;
2049
2050         close(thread_data->pipes.msg[1]);
2051         thread_data->pipes.msg[1] = -1;
2052         err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2053         if (err > 0)
2054                 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2055         else
2056                 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2057                            thread->tid, tid);
2058
2059         return 0;
2060 }
2061
2062 static int record__start_threads(struct record *rec)
2063 {
2064         int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2065         struct record_thread *thread_data = rec->thread_data;
2066         sigset_t full, mask;
2067         pthread_t handle;
2068         pthread_attr_t attrs;
2069
2070         thread = &thread_data[0];
2071
2072         if (!record__threads_enabled(rec))
2073                 return 0;
2074
2075         sigfillset(&full);
2076         if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2077                 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2078                 return -1;
2079         }
2080
2081         pthread_attr_init(&attrs);
2082         pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2083
2084         for (t = 1; t < nr_threads; t++) {
2085                 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2086
2087 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2088                 pthread_attr_setaffinity_np(&attrs,
2089                                             MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2090                                             (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2091 #endif
2092                 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2093                         for (tt = 1; tt < t; tt++)
2094                                 record__terminate_thread(&thread_data[t]);
2095                         pr_err("Failed to start threads: %s\n", strerror(errno));
2096                         ret = -1;
2097                         goto out_err;
2098                 }
2099
2100                 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2101                 if (err > 0)
2102                         pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2103                                   thread_msg_tags[msg]);
2104                 else
2105                         pr_warning("threads[%d]: failed to receive start notification from %d\n",
2106                                    thread->tid, rec->thread_data[t].tid);
2107         }
2108
2109         sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2110                         (cpu_set_t *)thread->mask->affinity.bits);
2111
2112         pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2113
2114 out_err:
2115         pthread_attr_destroy(&attrs);
2116
2117         if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2118                 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2119                 ret = -1;
2120         }
2121
2122         return ret;
2123 }
2124
2125 static int record__stop_threads(struct record *rec)
2126 {
2127         int t;
2128         struct record_thread *thread_data = rec->thread_data;
2129
2130         for (t = 1; t < rec->nr_threads; t++)
2131                 record__terminate_thread(&thread_data[t]);
2132
2133         for (t = 0; t < rec->nr_threads; t++) {
2134                 rec->samples += thread_data[t].samples;
2135                 if (!record__threads_enabled(rec))
2136                         continue;
2137                 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2138                 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2139                 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2140                          thread_data[t].samples, thread_data[t].waking);
2141                 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2142                         pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2143                                  thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2144                 else
2145                         pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2146         }
2147
2148         return 0;
2149 }
2150
2151 static unsigned long record__waking(struct record *rec)
2152 {
2153         int t;
2154         unsigned long waking = 0;
2155         struct record_thread *thread_data = rec->thread_data;
2156
2157         for (t = 0; t < rec->nr_threads; t++)
2158                 waking += thread_data[t].waking;
2159
2160         return waking;
2161 }
2162
2163 static int __cmd_record(struct record *rec, int argc, const char **argv)
2164 {
2165         int err;
2166         int status = 0;
2167         const bool forks = argc > 0;
2168         struct perf_tool *tool = &rec->tool;
2169         struct record_opts *opts = &rec->opts;
2170         struct perf_data *data = &rec->data;
2171         struct perf_session *session;
2172         bool disabled = false, draining = false;
2173         int fd;
2174         float ratio = 0;
2175         enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2176
2177         atexit(record__sig_exit);
2178         signal(SIGCHLD, sig_handler);
2179         signal(SIGINT, sig_handler);
2180         signal(SIGTERM, sig_handler);
2181         signal(SIGSEGV, sigsegv_handler);
2182
2183         if (rec->opts.record_namespaces)
2184                 tool->namespace_events = true;
2185
2186         if (rec->opts.record_cgroup) {
2187 #ifdef HAVE_FILE_HANDLE
2188                 tool->cgroup_events = true;
2189 #else
2190                 pr_err("cgroup tracking is not supported\n");
2191                 return -1;
2192 #endif
2193         }
2194
2195         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2196                 signal(SIGUSR2, snapshot_sig_handler);
2197                 if (rec->opts.auxtrace_snapshot_mode)
2198                         trigger_on(&auxtrace_snapshot_trigger);
2199                 if (rec->switch_output.enabled)
2200                         trigger_on(&switch_output_trigger);
2201         } else {
2202                 signal(SIGUSR2, SIG_IGN);
2203         }
2204
2205         session = perf_session__new(data, tool);
2206         if (IS_ERR(session)) {
2207                 pr_err("Perf session creation failed.\n");
2208                 return PTR_ERR(session);
2209         }
2210
2211         if (record__threads_enabled(rec)) {
2212                 if (perf_data__is_pipe(&rec->data)) {
2213                         pr_err("Parallel trace streaming is not available in pipe mode.\n");
2214                         return -1;
2215                 }
2216                 if (rec->opts.full_auxtrace) {
2217                         pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2218                         return -1;
2219                 }
2220         }
2221
2222         fd = perf_data__fd(data);
2223         rec->session = session;
2224
2225         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2226                 pr_err("Compression initialization failed.\n");
2227                 return -1;
2228         }
2229 #ifdef HAVE_EVENTFD_SUPPORT
2230         done_fd = eventfd(0, EFD_NONBLOCK);
2231         if (done_fd < 0) {
2232                 pr_err("Failed to create wakeup eventfd, error: %m\n");
2233                 status = -1;
2234                 goto out_delete_session;
2235         }
2236         err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2237         if (err < 0) {
2238                 pr_err("Failed to add wakeup eventfd to poll list\n");
2239                 status = err;
2240                 goto out_delete_session;
2241         }
2242 #endif // HAVE_EVENTFD_SUPPORT
2243
2244         session->header.env.comp_type  = PERF_COMP_ZSTD;
2245         session->header.env.comp_level = rec->opts.comp_level;
2246
2247         if (rec->opts.kcore &&
2248             !record__kcore_readable(&session->machines.host)) {
2249                 pr_err("ERROR: kcore is not readable.\n");
2250                 return -1;
2251         }
2252
2253         if (record__init_clock(rec))
2254                 return -1;
2255
2256         record__init_features(rec);
2257
2258         if (forks) {
2259                 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2260                                                workload_exec_failed_signal);
2261                 if (err < 0) {
2262                         pr_err("Couldn't run the workload!\n");
2263                         status = err;
2264                         goto out_delete_session;
2265                 }
2266         }
2267
2268         /*
2269          * If we have just single event and are sending data
2270          * through pipe, we need to force the ids allocation,
2271          * because we synthesize event name through the pipe
2272          * and need the id for that.
2273          */
2274         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2275                 rec->opts.sample_id = true;
2276
2277         record__uniquify_name(rec);
2278
2279         if (record__open(rec) != 0) {
2280                 err = -1;
2281                 goto out_free_threads;
2282         }
2283         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2284
2285         if (rec->opts.kcore) {
2286                 err = record__kcore_copy(&session->machines.host, data);
2287                 if (err) {
2288                         pr_err("ERROR: Failed to copy kcore\n");
2289                         goto out_free_threads;
2290                 }
2291         }
2292
2293         err = bpf__apply_obj_config();
2294         if (err) {
2295                 char errbuf[BUFSIZ];
2296
2297                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2298                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2299                          errbuf);
2300                 goto out_free_threads;
2301         }
2302
2303         /*
2304          * Normally perf_session__new would do this, but it doesn't have the
2305          * evlist.
2306          */
2307         if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2308                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2309                 rec->tool.ordered_events = false;
2310         }
2311
2312         if (!rec->evlist->core.nr_groups)
2313                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2314
2315         if (data->is_pipe) {
2316                 err = perf_header__write_pipe(fd);
2317                 if (err < 0)
2318                         goto out_free_threads;
2319         } else {
2320                 err = perf_session__write_header(session, rec->evlist, fd, false);
2321                 if (err < 0)
2322                         goto out_free_threads;
2323         }
2324
2325         err = -1;
2326         if (!rec->no_buildid
2327             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2328                 pr_err("Couldn't generate buildids. "
2329                        "Use --no-buildid to profile anyway.\n");
2330                 goto out_free_threads;
2331         }
2332
2333         err = record__setup_sb_evlist(rec);
2334         if (err)
2335                 goto out_free_threads;
2336
2337         err = record__synthesize(rec, false);
2338         if (err < 0)
2339                 goto out_free_threads;
2340
2341         if (rec->realtime_prio) {
2342                 struct sched_param param;
2343
2344                 param.sched_priority = rec->realtime_prio;
2345                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2346                         pr_err("Could not set realtime priority.\n");
2347                         err = -1;
2348                         goto out_free_threads;
2349                 }
2350         }
2351
2352         if (record__start_threads(rec))
2353                 goto out_free_threads;
2354
2355         /*
2356          * When perf is starting the traced process, all the events
2357          * (apart from group members) have enable_on_exec=1 set,
2358          * so don't spoil it by prematurely enabling them.
2359          */
2360         if (!target__none(&opts->target) && !opts->initial_delay)
2361                 evlist__enable(rec->evlist);
2362
2363         /*
2364          * Let the child rip
2365          */
2366         if (forks) {
2367                 struct machine *machine = &session->machines.host;
2368                 union perf_event *event;
2369                 pid_t tgid;
2370
2371                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2372                 if (event == NULL) {
2373                         err = -ENOMEM;
2374                         goto out_child;
2375                 }
2376
2377                 /*
2378                  * Some H/W events are generated before COMM event
2379                  * which is emitted during exec(), so perf script
2380                  * cannot see a correct process name for those events.
2381                  * Synthesize COMM event to prevent it.
2382                  */
2383                 tgid = perf_event__synthesize_comm(tool, event,
2384                                                    rec->evlist->workload.pid,
2385                                                    process_synthesized_event,
2386                                                    machine);
2387                 free(event);
2388
2389                 if (tgid == -1)
2390                         goto out_child;
2391
2392                 event = malloc(sizeof(event->namespaces) +
2393                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2394                                machine->id_hdr_size);
2395                 if (event == NULL) {
2396                         err = -ENOMEM;
2397                         goto out_child;
2398                 }
2399
2400                 /*
2401                  * Synthesize NAMESPACES event for the command specified.
2402                  */
2403                 perf_event__synthesize_namespaces(tool, event,
2404                                                   rec->evlist->workload.pid,
2405                                                   tgid, process_synthesized_event,
2406                                                   machine);
2407                 free(event);
2408
2409                 evlist__start_workload(rec->evlist);
2410         }
2411
2412         if (opts->initial_delay) {
2413                 pr_info(EVLIST_DISABLED_MSG);
2414                 if (opts->initial_delay > 0) {
2415                         usleep(opts->initial_delay * USEC_PER_MSEC);
2416                         evlist__enable(rec->evlist);
2417                         pr_info(EVLIST_ENABLED_MSG);
2418                 }
2419         }
2420
2421         trigger_ready(&auxtrace_snapshot_trigger);
2422         trigger_ready(&switch_output_trigger);
2423         perf_hooks__invoke_record_start();
2424         for (;;) {
2425                 unsigned long long hits = thread->samples;
2426
2427                 /*
2428                  * rec->evlist->bkw_mmap_state is possible to be
2429                  * BKW_MMAP_EMPTY here: when done == true and
2430                  * hits != rec->samples in previous round.
2431                  *
2432                  * evlist__toggle_bkw_mmap ensure we never
2433                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2434                  */
2435                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2436                         evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2437
2438                 if (record__mmap_read_all(rec, false) < 0) {
2439                         trigger_error(&auxtrace_snapshot_trigger);
2440                         trigger_error(&switch_output_trigger);
2441                         err = -1;
2442                         goto out_child;
2443                 }
2444
2445                 if (auxtrace_record__snapshot_started) {
2446                         auxtrace_record__snapshot_started = 0;
2447                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
2448                                 record__read_auxtrace_snapshot(rec, false);
2449                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2450                                 pr_err("AUX area tracing snapshot failed\n");
2451                                 err = -1;
2452                                 goto out_child;
2453                         }
2454                 }
2455
2456                 if (trigger_is_hit(&switch_output_trigger)) {
2457                         /*
2458                          * If switch_output_trigger is hit, the data in
2459                          * overwritable ring buffer should have been collected,
2460                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2461                          *
2462                          * If SIGUSR2 raise after or during record__mmap_read_all(),
2463                          * record__mmap_read_all() didn't collect data from
2464                          * overwritable ring buffer. Read again.
2465                          */
2466                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2467                                 continue;
2468                         trigger_ready(&switch_output_trigger);
2469
2470                         /*
2471                          * Reenable events in overwrite ring buffer after
2472                          * record__mmap_read_all(): we should have collected
2473                          * data from it.
2474                          */
2475                         evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2476
2477                         if (!quiet)
2478                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2479                                         record__waking(rec));
2480                         thread->waking = 0;
2481                         fd = record__switch_output(rec, false);
2482                         if (fd < 0) {
2483                                 pr_err("Failed to switch to new file\n");
2484                                 trigger_error(&switch_output_trigger);
2485                                 err = fd;
2486                                 goto out_child;
2487                         }
2488
2489                         /* re-arm the alarm */
2490                         if (rec->switch_output.time)
2491                                 alarm(rec->switch_output.time);
2492                 }
2493
2494                 if (hits == thread->samples) {
2495                         if (done || draining)
2496                                 break;
2497                         err = fdarray__poll(&thread->pollfd, -1);
2498                         /*
2499                          * Propagate error, only if there's any. Ignore positive
2500                          * number of returned events and interrupt error.
2501                          */
2502                         if (err > 0 || (err < 0 && errno == EINTR))
2503                                 err = 0;
2504                         thread->waking++;
2505
2506                         if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2507                                             record__thread_munmap_filtered, NULL) == 0)
2508                                 draining = true;
2509
2510                         evlist__ctlfd_update(rec->evlist,
2511                                 &thread->pollfd.entries[thread->ctlfd_pos]);
2512                 }
2513
2514                 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2515                         switch (cmd) {
2516                         case EVLIST_CTL_CMD_SNAPSHOT:
2517                                 hit_auxtrace_snapshot_trigger(rec);
2518                                 evlist__ctlfd_ack(rec->evlist);
2519                                 break;
2520                         case EVLIST_CTL_CMD_STOP:
2521                                 done = 1;
2522                                 break;
2523                         case EVLIST_CTL_CMD_ACK:
2524                         case EVLIST_CTL_CMD_UNSUPPORTED:
2525                         case EVLIST_CTL_CMD_ENABLE:
2526                         case EVLIST_CTL_CMD_DISABLE:
2527                         case EVLIST_CTL_CMD_EVLIST:
2528                         case EVLIST_CTL_CMD_PING:
2529                         default:
2530                                 break;
2531                         }
2532                 }
2533
2534                 /*
2535                  * When perf is starting the traced process, at the end events
2536                  * die with the process and we wait for that. Thus no need to
2537                  * disable events in this case.
2538                  */
2539                 if (done && !disabled && !target__none(&opts->target)) {
2540                         trigger_off(&auxtrace_snapshot_trigger);
2541                         evlist__disable(rec->evlist);
2542                         disabled = true;
2543                 }
2544         }
2545
2546         trigger_off(&auxtrace_snapshot_trigger);
2547         trigger_off(&switch_output_trigger);
2548
2549         if (opts->auxtrace_snapshot_on_exit)
2550                 record__auxtrace_snapshot_exit(rec);
2551
2552         if (forks && workload_exec_errno) {
2553                 char msg[STRERR_BUFSIZE], strevsels[2048];
2554                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2555
2556                 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2557
2558                 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2559                         strevsels, argv[0], emsg);
2560                 err = -1;
2561                 goto out_child;
2562         }
2563
2564         if (!quiet)
2565                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2566                         record__waking(rec));
2567
2568         if (target__none(&rec->opts.target))
2569                 record__synthesize_workload(rec, true);
2570
2571 out_child:
2572         record__stop_threads(rec);
2573         record__mmap_read_all(rec, true);
2574 out_free_threads:
2575         record__free_thread_data(rec);
2576         evlist__finalize_ctlfd(rec->evlist);
2577         record__aio_mmap_read_sync(rec);
2578
2579         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2580                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2581                 session->header.env.comp_ratio = ratio + 0.5;
2582         }
2583
2584         if (forks) {
2585                 int exit_status;
2586
2587                 if (!child_finished)
2588                         kill(rec->evlist->workload.pid, SIGTERM);
2589
2590                 wait(&exit_status);
2591
2592                 if (err < 0)
2593                         status = err;
2594                 else if (WIFEXITED(exit_status))
2595                         status = WEXITSTATUS(exit_status);
2596                 else if (WIFSIGNALED(exit_status))
2597                         signr = WTERMSIG(exit_status);
2598         } else
2599                 status = err;
2600
2601         if (rec->off_cpu)
2602                 rec->bytes_written += off_cpu_write(rec->session);
2603
2604         record__synthesize(rec, true);
2605         /* this will be recalculated during process_buildids() */
2606         rec->samples = 0;
2607
2608         if (!err) {
2609                 if (!rec->timestamp_filename) {
2610                         record__finish_output(rec);
2611                 } else {
2612                         fd = record__switch_output(rec, true);
2613                         if (fd < 0) {
2614                                 status = fd;
2615                                 goto out_delete_session;
2616                         }
2617                 }
2618         }
2619
2620         perf_hooks__invoke_record_end();
2621
2622         if (!err && !quiet) {
2623                 char samples[128];
2624                 const char *postfix = rec->timestamp_filename ?
2625                                         ".<timestamp>" : "";
2626
2627                 if (rec->samples && !rec->opts.full_auxtrace)
2628                         scnprintf(samples, sizeof(samples),
2629                                   " (%" PRIu64 " samples)", rec->samples);
2630                 else
2631                         samples[0] = '\0';
2632
2633                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2634                         perf_data__size(data) / 1024.0 / 1024.0,
2635                         data->path, postfix, samples);
2636                 if (ratio) {
2637                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2638                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
2639                                         ratio);
2640                 }
2641                 fprintf(stderr, " ]\n");
2642         }
2643
2644 out_delete_session:
2645 #ifdef HAVE_EVENTFD_SUPPORT
2646         if (done_fd >= 0)
2647                 close(done_fd);
2648 #endif
2649         zstd_fini(&session->zstd_data);
2650         perf_session__delete(session);
2651
2652         if (!opts->no_bpf_event)
2653                 evlist__stop_sb_thread(rec->sb_evlist);
2654         return status;
2655 }
2656
2657 static void callchain_debug(struct callchain_param *callchain)
2658 {
2659         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2660
2661         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2662
2663         if (callchain->record_mode == CALLCHAIN_DWARF)
2664                 pr_debug("callchain: stack dump size %d\n",
2665                          callchain->dump_size);
2666 }
2667
2668 int record_opts__parse_callchain(struct record_opts *record,
2669                                  struct callchain_param *callchain,
2670                                  const char *arg, bool unset)
2671 {
2672         int ret;
2673         callchain->enabled = !unset;
2674
2675         /* --no-call-graph */
2676         if (unset) {
2677                 callchain->record_mode = CALLCHAIN_NONE;
2678                 pr_debug("callchain: disabled\n");
2679                 return 0;
2680         }
2681
2682         ret = parse_callchain_record_opt(arg, callchain);
2683         if (!ret) {
2684                 /* Enable data address sampling for DWARF unwind. */
2685                 if (callchain->record_mode == CALLCHAIN_DWARF)
2686                         record->sample_address = true;
2687                 callchain_debug(callchain);
2688         }
2689
2690         return ret;
2691 }
2692
2693 int record_parse_callchain_opt(const struct option *opt,
2694                                const char *arg,
2695                                int unset)
2696 {
2697         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2698 }
2699
2700 int record_callchain_opt(const struct option *opt,
2701                          const char *arg __maybe_unused,
2702                          int unset __maybe_unused)
2703 {
2704         struct callchain_param *callchain = opt->value;
2705
2706         callchain->enabled = true;
2707
2708         if (callchain->record_mode == CALLCHAIN_NONE)
2709                 callchain->record_mode = CALLCHAIN_FP;
2710
2711         callchain_debug(callchain);
2712         return 0;
2713 }
2714
2715 static int perf_record_config(const char *var, const char *value, void *cb)
2716 {
2717         struct record *rec = cb;
2718
2719         if (!strcmp(var, "record.build-id")) {
2720                 if (!strcmp(value, "cache"))
2721                         rec->no_buildid_cache = false;
2722                 else if (!strcmp(value, "no-cache"))
2723                         rec->no_buildid_cache = true;
2724                 else if (!strcmp(value, "skip"))
2725                         rec->no_buildid = true;
2726                 else if (!strcmp(value, "mmap"))
2727                         rec->buildid_mmap = true;
2728                 else
2729                         return -1;
2730                 return 0;
2731         }
2732         if (!strcmp(var, "record.call-graph")) {
2733                 var = "call-graph.record-mode";
2734                 return perf_default_config(var, value, cb);
2735         }
2736 #ifdef HAVE_AIO_SUPPORT
2737         if (!strcmp(var, "record.aio")) {
2738                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2739                 if (!rec->opts.nr_cblocks)
2740                         rec->opts.nr_cblocks = nr_cblocks_default;
2741         }
2742 #endif
2743         if (!strcmp(var, "record.debuginfod")) {
2744                 rec->debuginfod.urls = strdup(value);
2745                 if (!rec->debuginfod.urls)
2746                         return -ENOMEM;
2747                 rec->debuginfod.set = true;
2748         }
2749
2750         return 0;
2751 }
2752
2753
2754 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2755 {
2756         struct record_opts *opts = (struct record_opts *)opt->value;
2757
2758         if (unset || !str)
2759                 return 0;
2760
2761         if (!strcasecmp(str, "node"))
2762                 opts->affinity = PERF_AFFINITY_NODE;
2763         else if (!strcasecmp(str, "cpu"))
2764                 opts->affinity = PERF_AFFINITY_CPU;
2765
2766         return 0;
2767 }
2768
2769 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2770 {
2771         mask->nbits = nr_bits;
2772         mask->bits = bitmap_zalloc(mask->nbits);
2773         if (!mask->bits)
2774                 return -ENOMEM;
2775
2776         return 0;
2777 }
2778
2779 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2780 {
2781         bitmap_free(mask->bits);
2782         mask->nbits = 0;
2783 }
2784
2785 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2786 {
2787         int ret;
2788
2789         ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2790         if (ret) {
2791                 mask->affinity.bits = NULL;
2792                 return ret;
2793         }
2794
2795         ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2796         if (ret) {
2797                 record__mmap_cpu_mask_free(&mask->maps);
2798                 mask->maps.bits = NULL;
2799         }
2800
2801         return ret;
2802 }
2803
2804 static void record__thread_mask_free(struct thread_mask *mask)
2805 {
2806         record__mmap_cpu_mask_free(&mask->maps);
2807         record__mmap_cpu_mask_free(&mask->affinity);
2808 }
2809
2810 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2811 {
2812         int s;
2813         struct record_opts *opts = opt->value;
2814
2815         if (unset || !str || !strlen(str)) {
2816                 opts->threads_spec = THREAD_SPEC__CPU;
2817         } else {
2818                 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2819                         if (s == THREAD_SPEC__USER) {
2820                                 opts->threads_user_spec = strdup(str);
2821                                 if (!opts->threads_user_spec)
2822                                         return -ENOMEM;
2823                                 opts->threads_spec = THREAD_SPEC__USER;
2824                                 break;
2825                         }
2826                         if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2827                                 opts->threads_spec = s;
2828                                 break;
2829                         }
2830                 }
2831         }
2832
2833         if (opts->threads_spec == THREAD_SPEC__USER)
2834                 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2835         else
2836                 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2837
2838         return 0;
2839 }
2840
2841 static int parse_output_max_size(const struct option *opt,
2842                                  const char *str, int unset)
2843 {
2844         unsigned long *s = (unsigned long *)opt->value;
2845         static struct parse_tag tags_size[] = {
2846                 { .tag  = 'B', .mult = 1       },
2847                 { .tag  = 'K', .mult = 1 << 10 },
2848                 { .tag  = 'M', .mult = 1 << 20 },
2849                 { .tag  = 'G', .mult = 1 << 30 },
2850                 { .tag  = 0 },
2851         };
2852         unsigned long val;
2853
2854         if (unset) {
2855                 *s = 0;
2856                 return 0;
2857         }
2858
2859         val = parse_tag_value(str, tags_size);
2860         if (val != (unsigned long) -1) {
2861                 *s = val;
2862                 return 0;
2863         }
2864
2865         return -1;
2866 }
2867
2868 static int record__parse_mmap_pages(const struct option *opt,
2869                                     const char *str,
2870                                     int unset __maybe_unused)
2871 {
2872         struct record_opts *opts = opt->value;
2873         char *s, *p;
2874         unsigned int mmap_pages;
2875         int ret;
2876
2877         if (!str)
2878                 return -EINVAL;
2879
2880         s = strdup(str);
2881         if (!s)
2882                 return -ENOMEM;
2883
2884         p = strchr(s, ',');
2885         if (p)
2886                 *p = '\0';
2887
2888         if (*s) {
2889                 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2890                 if (ret)
2891                         goto out_free;
2892                 opts->mmap_pages = mmap_pages;
2893         }
2894
2895         if (!p) {
2896                 ret = 0;
2897                 goto out_free;
2898         }
2899
2900         ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2901         if (ret)
2902                 goto out_free;
2903
2904         opts->auxtrace_mmap_pages = mmap_pages;
2905
2906 out_free:
2907         free(s);
2908         return ret;
2909 }
2910
2911 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2912 {
2913 }
2914
2915 static int parse_control_option(const struct option *opt,
2916                                 const char *str,
2917                                 int unset __maybe_unused)
2918 {
2919         struct record_opts *opts = opt->value;
2920
2921         return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2922 }
2923
2924 static void switch_output_size_warn(struct record *rec)
2925 {
2926         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2927         struct switch_output *s = &rec->switch_output;
2928
2929         wakeup_size /= 2;
2930
2931         if (s->size < wakeup_size) {
2932                 char buf[100];
2933
2934                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2935                 pr_warning("WARNING: switch-output data size lower than "
2936                            "wakeup kernel buffer size (%s) "
2937                            "expect bigger perf.data sizes\n", buf);
2938         }
2939 }
2940
2941 static int switch_output_setup(struct record *rec)
2942 {
2943         struct switch_output *s = &rec->switch_output;
2944         static struct parse_tag tags_size[] = {
2945                 { .tag  = 'B', .mult = 1       },
2946                 { .tag  = 'K', .mult = 1 << 10 },
2947                 { .tag  = 'M', .mult = 1 << 20 },
2948                 { .tag  = 'G', .mult = 1 << 30 },
2949                 { .tag  = 0 },
2950         };
2951         static struct parse_tag tags_time[] = {
2952                 { .tag  = 's', .mult = 1        },
2953                 { .tag  = 'm', .mult = 60       },
2954                 { .tag  = 'h', .mult = 60*60    },
2955                 { .tag  = 'd', .mult = 60*60*24 },
2956                 { .tag  = 0 },
2957         };
2958         unsigned long val;
2959
2960         /*
2961          * If we're using --switch-output-events, then we imply its 
2962          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2963          *  thread to its parent.
2964          */
2965         if (rec->switch_output_event_set) {
2966                 if (record__threads_enabled(rec)) {
2967                         pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2968                         return 0;
2969                 }
2970                 goto do_signal;
2971         }
2972
2973         if (!s->set)
2974                 return 0;
2975
2976         if (record__threads_enabled(rec)) {
2977                 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2978                 return 0;
2979         }
2980
2981         if (!strcmp(s->str, "signal")) {
2982 do_signal:
2983                 s->signal = true;
2984                 pr_debug("switch-output with SIGUSR2 signal\n");
2985                 goto enabled;
2986         }
2987
2988         val = parse_tag_value(s->str, tags_size);
2989         if (val != (unsigned long) -1) {
2990                 s->size = val;
2991                 pr_debug("switch-output with %s size threshold\n", s->str);
2992                 goto enabled;
2993         }
2994
2995         val = parse_tag_value(s->str, tags_time);
2996         if (val != (unsigned long) -1) {
2997                 s->time = val;
2998                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2999                          s->str, s->time);
3000                 goto enabled;
3001         }
3002
3003         return -1;
3004
3005 enabled:
3006         rec->timestamp_filename = true;
3007         s->enabled              = true;
3008
3009         if (s->size && !rec->opts.no_buffering)
3010                 switch_output_size_warn(rec);
3011
3012         return 0;
3013 }
3014
3015 static const char * const __record_usage[] = {
3016         "perf record [<options>] [<command>]",
3017         "perf record [<options>] -- <command> [<options>]",
3018         NULL
3019 };
3020 const char * const *record_usage = __record_usage;
3021
3022 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3023                                   struct perf_sample *sample, struct machine *machine)
3024 {
3025         /*
3026          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3027          * no need to add them twice.
3028          */
3029         if (!(event->header.misc & PERF_RECORD_MISC_USER))
3030                 return 0;
3031         return perf_event__process_mmap(tool, event, sample, machine);
3032 }
3033
3034 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3035                                    struct perf_sample *sample, struct machine *machine)
3036 {
3037         /*
3038          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3039          * no need to add them twice.
3040          */
3041         if (!(event->header.misc & PERF_RECORD_MISC_USER))
3042                 return 0;
3043
3044         return perf_event__process_mmap2(tool, event, sample, machine);
3045 }
3046
3047 static int process_timestamp_boundary(struct perf_tool *tool,
3048                                       union perf_event *event __maybe_unused,
3049                                       struct perf_sample *sample,
3050                                       struct machine *machine __maybe_unused)
3051 {
3052         struct record *rec = container_of(tool, struct record, tool);
3053
3054         set_timestamp_boundary(rec, sample->time);
3055         return 0;
3056 }
3057
3058 static int parse_record_synth_option(const struct option *opt,
3059                                      const char *str,
3060                                      int unset __maybe_unused)
3061 {
3062         struct record_opts *opts = opt->value;
3063         char *p = strdup(str);
3064
3065         if (p == NULL)
3066                 return -1;
3067
3068         opts->synth = parse_synth_opt(p);
3069         free(p);
3070
3071         if (opts->synth < 0) {
3072                 pr_err("Invalid synth option: %s\n", str);
3073                 return -1;
3074         }
3075         return 0;
3076 }
3077
3078 /*
3079  * XXX Ideally would be local to cmd_record() and passed to a record__new
3080  * because we need to have access to it in record__exit, that is called
3081  * after cmd_record() exits, but since record_options need to be accessible to
3082  * builtin-script, leave it here.
3083  *
3084  * At least we don't ouch it in all the other functions here directly.
3085  *
3086  * Just say no to tons of global variables, sigh.
3087  */
3088 static struct record record = {
3089         .opts = {
3090                 .sample_time         = true,
3091                 .mmap_pages          = UINT_MAX,
3092                 .user_freq           = UINT_MAX,
3093                 .user_interval       = ULLONG_MAX,
3094                 .freq                = 4000,
3095                 .target              = {
3096                         .uses_mmap   = true,
3097                         .default_per_cpu = true,
3098                 },
3099                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
3100                 .nr_threads_synthesize = 1,
3101                 .ctl_fd              = -1,
3102                 .ctl_fd_ack          = -1,
3103                 .synth               = PERF_SYNTH_ALL,
3104         },
3105         .tool = {
3106                 .sample         = process_sample_event,
3107                 .fork           = perf_event__process_fork,
3108                 .exit           = perf_event__process_exit,
3109                 .comm           = perf_event__process_comm,
3110                 .namespaces     = perf_event__process_namespaces,
3111                 .mmap           = build_id__process_mmap,
3112                 .mmap2          = build_id__process_mmap2,
3113                 .itrace_start   = process_timestamp_boundary,
3114                 .aux            = process_timestamp_boundary,
3115                 .ordered_events = true,
3116         },
3117 };
3118
3119 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3120         "\n\t\t\t\tDefault: fp";
3121
3122 static bool dry_run;
3123
3124 /*
3125  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3126  * with it and switch to use the library functions in perf_evlist that came
3127  * from builtin-record.c, i.e. use record_opts,
3128  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3129  * using pipes, etc.
3130  */
3131 static struct option __record_options[] = {
3132         OPT_CALLBACK('e', "event", &record.evlist, "event",
3133                      "event selector. use 'perf list' to list available events",
3134                      parse_events_option),
3135         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3136                      "event filter", parse_filter),
3137         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3138                            NULL, "don't record events from perf itself",
3139                            exclude_perf),
3140         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3141                     "record events on existing process id"),
3142         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3143                     "record events on existing thread id"),
3144         OPT_INTEGER('r', "realtime", &record.realtime_prio,
3145                     "collect data with this RT SCHED_FIFO priority"),
3146         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3147                     "collect data without buffering"),
3148         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3149                     "collect raw sample records from all opened counters"),
3150         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3151                             "system-wide collection from all CPUs"),
3152         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3153                     "list of cpus to monitor"),
3154         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3155         OPT_STRING('o', "output", &record.data.path, "file",
3156                     "output file name"),
3157         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3158                         &record.opts.no_inherit_set,
3159                         "child tasks do not inherit counters"),
3160         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3161                     "synthesize non-sample events at the end of output"),
3162         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3163         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3164         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3165                     "Fail if the specified frequency can't be used"),
3166         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3167                      "profile at this frequency",
3168                       record__parse_freq),
3169         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3170                      "number of mmap data pages and AUX area tracing mmap pages",
3171                      record__parse_mmap_pages),
3172         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3173                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3174                      record__mmap_flush_parse),
3175         OPT_BOOLEAN(0, "group", &record.opts.group,
3176                     "put the counters into a counter group"),
3177         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3178                            NULL, "enables call-graph recording" ,
3179                            &record_callchain_opt),
3180         OPT_CALLBACK(0, "call-graph", &record.opts,
3181                      "record_mode[,record_size]", record_callchain_help,
3182                      &record_parse_callchain_opt),
3183         OPT_INCR('v', "verbose", &verbose,
3184                     "be more verbose (show counter open errors, etc)"),
3185         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3186         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3187                     "per thread counts"),
3188         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3189         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3190                     "Record the sample physical addresses"),
3191         OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3192                     "Record the sampled data address data page size"),
3193         OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3194                     "Record the sampled code address (ip) page size"),
3195         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3196         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3197                         &record.opts.sample_time_set,
3198                         "Record the sample timestamps"),
3199         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3200                         "Record the sample period"),
3201         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3202                     "don't sample"),
3203         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3204                         &record.no_buildid_cache_set,
3205                         "do not update the buildid cache"),
3206         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3207                         &record.no_buildid_set,
3208                         "do not collect buildids in perf.data"),
3209         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3210                      "monitor event in cgroup name only",
3211                      parse_cgroups),
3212         OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3213                   "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3214         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3215         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3216                    "user to profile"),
3217
3218         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3219                      "branch any", "sample any taken branches",
3220                      parse_branch_stack),
3221
3222         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3223                      "branch filter mask", "branch stack filter modes",
3224                      parse_branch_stack),
3225         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3226                     "sample by weight (on special events only)"),
3227         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3228                     "sample transaction flags (special events only)"),
3229         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3230                     "use per-thread mmaps"),
3231         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3232                     "sample selected machine registers on interrupt,"
3233                     " use '-I?' to list register names", parse_intr_regs),
3234         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3235                     "sample selected machine registers on interrupt,"
3236                     " use '--user-regs=?' to list register names", parse_user_regs),
3237         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3238                     "Record running/enabled time of read (:S) events"),
3239         OPT_CALLBACK('k', "clockid", &record.opts,
3240         "clockid", "clockid to use for events, see clock_gettime()",
3241         parse_clockid),
3242         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3243                           "opts", "AUX area tracing Snapshot Mode", ""),
3244         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3245                           "opts", "sample AUX area", ""),
3246         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3247                         "per thread proc mmap processing timeout in ms"),
3248         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3249                     "Record namespaces events"),
3250         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3251                     "Record cgroup events"),
3252         OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3253                         &record.opts.record_switch_events_set,
3254                         "Record context switch events"),
3255         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3256                          "Configure all used events to run in kernel space.",
3257                          PARSE_OPT_EXCLUSIVE),
3258         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3259                          "Configure all used events to run in user space.",
3260                          PARSE_OPT_EXCLUSIVE),
3261         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3262                     "collect kernel callchains"),
3263         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3264                     "collect user callchains"),
3265         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3266                    "clang binary to use for compiling BPF scriptlets"),
3267         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3268                    "options passed to clang when compiling BPF scriptlets"),
3269         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3270                    "file", "vmlinux pathname"),
3271         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3272                     "Record build-id of all DSOs regardless of hits"),
3273         OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3274                     "Record build-id in map events"),
3275         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3276                     "append timestamp to output filename"),
3277         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3278                     "Record timestamp boundary (time of first/last samples)"),
3279         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3280                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3281                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3282                           "signal"),
3283         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3284                          "switch output event selector. use 'perf list' to list available events",
3285                          parse_events_option_new_evlist),
3286         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3287                    "Limit number of switch output generated files"),
3288         OPT_BOOLEAN(0, "dry-run", &dry_run,
3289                     "Parse options then exit"),
3290 #ifdef HAVE_AIO_SUPPORT
3291         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3292                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3293                      record__aio_parse),
3294 #endif
3295         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3296                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3297                      record__parse_affinity),
3298 #ifdef HAVE_ZSTD_SUPPORT
3299         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3300                             "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3301                             record__parse_comp_level),
3302 #endif
3303         OPT_CALLBACK(0, "max-size", &record.output_max_size,
3304                      "size", "Limit the maximum size of the output file", parse_output_max_size),
3305         OPT_UINTEGER(0, "num-thread-synthesize",
3306                      &record.opts.nr_threads_synthesize,
3307                      "number of threads to run for event synthesis"),
3308 #ifdef HAVE_LIBPFM
3309         OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3310                 "libpfm4 event selector. use 'perf list' to list available events",
3311                 parse_libpfm_events_option),
3312 #endif
3313         OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3314                      "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3315                      "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3316                      "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3317                      "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3318                       parse_control_option),
3319         OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3320                      "Fine-tune event synthesis: default=all", parse_record_synth_option),
3321         OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3322                           &record.debuginfod.set, "debuginfod urls",
3323                           "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3324                           "system"),
3325         OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3326                             "write collected trace data into several data files using parallel threads",
3327                             record__parse_threads),
3328         OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3329         OPT_END()
3330 };
3331
3332 struct option *record_options = __record_options;
3333
3334 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3335 {
3336         struct perf_cpu cpu;
3337         int idx;
3338
3339         if (cpu_map__is_dummy(cpus))
3340                 return;
3341
3342         perf_cpu_map__for_each_cpu(cpu, idx, cpus)
3343                 set_bit(cpu.cpu, mask->bits);
3344 }
3345
3346 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3347 {
3348         struct perf_cpu_map *cpus;
3349
3350         cpus = perf_cpu_map__new(mask_spec);
3351         if (!cpus)
3352                 return -ENOMEM;
3353
3354         bitmap_zero(mask->bits, mask->nbits);
3355         record__mmap_cpu_mask_init(mask, cpus);
3356         perf_cpu_map__put(cpus);
3357
3358         return 0;
3359 }
3360
3361 static void record__free_thread_masks(struct record *rec, int nr_threads)
3362 {
3363         int t;
3364
3365         if (rec->thread_masks)
3366                 for (t = 0; t < nr_threads; t++)
3367                         record__thread_mask_free(&rec->thread_masks[t]);
3368
3369         zfree(&rec->thread_masks);
3370 }
3371
3372 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3373 {
3374         int t, ret;
3375
3376         rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3377         if (!rec->thread_masks) {
3378                 pr_err("Failed to allocate thread masks\n");
3379                 return -ENOMEM;
3380         }
3381
3382         for (t = 0; t < nr_threads; t++) {
3383                 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3384                 if (ret) {
3385                         pr_err("Failed to allocate thread masks[%d]\n", t);
3386                         goto out_free;
3387                 }
3388         }
3389
3390         return 0;
3391
3392 out_free:
3393         record__free_thread_masks(rec, nr_threads);
3394
3395         return ret;
3396 }
3397
3398 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3399 {
3400         int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3401
3402         ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3403         if (ret)
3404                 return ret;
3405
3406         rec->nr_threads = nr_cpus;
3407         pr_debug("nr_threads: %d\n", rec->nr_threads);
3408
3409         for (t = 0; t < rec->nr_threads; t++) {
3410                 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3411                 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3412                 if (verbose) {
3413                         pr_debug("thread_masks[%d]: ", t);
3414                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3415                         pr_debug("thread_masks[%d]: ", t);
3416                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3417                 }
3418         }
3419
3420         return 0;
3421 }
3422
3423 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3424                                           const char **maps_spec, const char **affinity_spec,
3425                                           u32 nr_spec)
3426 {
3427         u32 s;
3428         int ret = 0, t = 0;
3429         struct mmap_cpu_mask cpus_mask;
3430         struct thread_mask thread_mask, full_mask, *thread_masks;
3431
3432         ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3433         if (ret) {
3434                 pr_err("Failed to allocate CPUs mask\n");
3435                 return ret;
3436         }
3437         record__mmap_cpu_mask_init(&cpus_mask, cpus);
3438
3439         ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3440         if (ret) {
3441                 pr_err("Failed to allocate full mask\n");
3442                 goto out_free_cpu_mask;
3443         }
3444
3445         ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3446         if (ret) {
3447                 pr_err("Failed to allocate thread mask\n");
3448                 goto out_free_full_and_cpu_masks;
3449         }
3450
3451         for (s = 0; s < nr_spec; s++) {
3452                 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3453                 if (ret) {
3454                         pr_err("Failed to initialize maps thread mask\n");
3455                         goto out_free;
3456                 }
3457                 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3458                 if (ret) {
3459                         pr_err("Failed to initialize affinity thread mask\n");
3460                         goto out_free;
3461                 }
3462
3463                 /* ignore invalid CPUs but do not allow empty masks */
3464                 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3465                                 cpus_mask.bits, thread_mask.maps.nbits)) {
3466                         pr_err("Empty maps mask: %s\n", maps_spec[s]);
3467                         ret = -EINVAL;
3468                         goto out_free;
3469                 }
3470                 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3471                                 cpus_mask.bits, thread_mask.affinity.nbits)) {
3472                         pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3473                         ret = -EINVAL;
3474                         goto out_free;
3475                 }
3476
3477                 /* do not allow intersection with other masks (full_mask) */
3478                 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3479                                       thread_mask.maps.nbits)) {
3480                         pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3481                         ret = -EINVAL;
3482                         goto out_free;
3483                 }
3484                 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3485                                       thread_mask.affinity.nbits)) {
3486                         pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3487                         ret = -EINVAL;
3488                         goto out_free;
3489                 }
3490
3491                 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3492                           thread_mask.maps.bits, full_mask.maps.nbits);
3493                 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3494                           thread_mask.affinity.bits, full_mask.maps.nbits);
3495
3496                 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3497                 if (!thread_masks) {
3498                         pr_err("Failed to reallocate thread masks\n");
3499                         ret = -ENOMEM;
3500                         goto out_free;
3501                 }
3502                 rec->thread_masks = thread_masks;
3503                 rec->thread_masks[t] = thread_mask;
3504                 if (verbose) {
3505                         pr_debug("thread_masks[%d]: ", t);
3506                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3507                         pr_debug("thread_masks[%d]: ", t);
3508                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3509                 }
3510                 t++;
3511                 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3512                 if (ret) {
3513                         pr_err("Failed to allocate thread mask\n");
3514                         goto out_free_full_and_cpu_masks;
3515                 }
3516         }
3517         rec->nr_threads = t;
3518         pr_debug("nr_threads: %d\n", rec->nr_threads);
3519         if (!rec->nr_threads)
3520                 ret = -EINVAL;
3521
3522 out_free:
3523         record__thread_mask_free(&thread_mask);
3524 out_free_full_and_cpu_masks:
3525         record__thread_mask_free(&full_mask);
3526 out_free_cpu_mask:
3527         record__mmap_cpu_mask_free(&cpus_mask);
3528
3529         return ret;
3530 }
3531
3532 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3533 {
3534         int ret;
3535         struct cpu_topology *topo;
3536
3537         topo = cpu_topology__new();
3538         if (!topo) {
3539                 pr_err("Failed to allocate CPU topology\n");
3540                 return -ENOMEM;
3541         }
3542
3543         ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3544                                              topo->core_cpus_list, topo->core_cpus_lists);
3545         cpu_topology__delete(topo);
3546
3547         return ret;
3548 }
3549
3550 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3551 {
3552         int ret;
3553         struct cpu_topology *topo;
3554
3555         topo = cpu_topology__new();
3556         if (!topo) {
3557                 pr_err("Failed to allocate CPU topology\n");
3558                 return -ENOMEM;
3559         }
3560
3561         ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3562                                              topo->package_cpus_list, topo->package_cpus_lists);
3563         cpu_topology__delete(topo);
3564
3565         return ret;
3566 }
3567
3568 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3569 {
3570         u32 s;
3571         int ret;
3572         const char **spec;
3573         struct numa_topology *topo;
3574
3575         topo = numa_topology__new();
3576         if (!topo) {
3577                 pr_err("Failed to allocate NUMA topology\n");
3578                 return -ENOMEM;
3579         }
3580
3581         spec = zalloc(topo->nr * sizeof(char *));
3582         if (!spec) {
3583                 pr_err("Failed to allocate NUMA spec\n");
3584                 ret = -ENOMEM;
3585                 goto out_delete_topo;
3586         }
3587         for (s = 0; s < topo->nr; s++)
3588                 spec[s] = topo->nodes[s].cpus;
3589
3590         ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3591
3592         zfree(&spec);
3593
3594 out_delete_topo:
3595         numa_topology__delete(topo);
3596
3597         return ret;
3598 }
3599
3600 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3601 {
3602         int t, ret;
3603         u32 s, nr_spec = 0;
3604         char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3605         char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3606
3607         for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3608                 spec = strtok_r(user_spec, ":", &spec_ptr);
3609                 if (spec == NULL)
3610                         break;
3611                 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3612                 mask = strtok_r(spec, "/", &mask_ptr);
3613                 if (mask == NULL)
3614                         break;
3615                 pr_debug2("  maps mask: %s\n", mask);
3616                 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3617                 if (!tmp_spec) {
3618                         pr_err("Failed to reallocate maps spec\n");
3619                         ret = -ENOMEM;
3620                         goto out_free;
3621                 }
3622                 maps_spec = tmp_spec;
3623                 maps_spec[nr_spec] = dup_mask = strdup(mask);
3624                 if (!maps_spec[nr_spec]) {
3625                         pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3626                         ret = -ENOMEM;
3627                         goto out_free;
3628                 }
3629                 mask = strtok_r(NULL, "/", &mask_ptr);
3630                 if (mask == NULL) {
3631                         pr_err("Invalid thread maps or affinity specs\n");
3632                         ret = -EINVAL;
3633                         goto out_free;
3634                 }
3635                 pr_debug2("  affinity mask: %s\n", mask);
3636                 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3637                 if (!tmp_spec) {
3638                         pr_err("Failed to reallocate affinity spec\n");
3639                         ret = -ENOMEM;
3640                         goto out_free;
3641                 }
3642                 affinity_spec = tmp_spec;
3643                 affinity_spec[nr_spec] = strdup(mask);
3644                 if (!affinity_spec[nr_spec]) {
3645                         pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3646                         ret = -ENOMEM;
3647                         goto out_free;
3648                 }
3649                 dup_mask = NULL;
3650                 nr_spec++;
3651         }
3652
3653         ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3654                                              (const char **)affinity_spec, nr_spec);
3655
3656 out_free:
3657         free(dup_mask);
3658         for (s = 0; s < nr_spec; s++) {
3659                 if (maps_spec)
3660                         free(maps_spec[s]);
3661                 if (affinity_spec)
3662                         free(affinity_spec[s]);
3663         }
3664         free(affinity_spec);
3665         free(maps_spec);
3666
3667         return ret;
3668 }
3669
3670 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3671 {
3672         int ret;
3673
3674         ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3675         if (ret)
3676                 return ret;
3677
3678         record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3679
3680         rec->nr_threads = 1;
3681
3682         return 0;
3683 }
3684
3685 static int record__init_thread_masks(struct record *rec)
3686 {
3687         int ret = 0;
3688         struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3689
3690         if (!record__threads_enabled(rec))
3691                 return record__init_thread_default_masks(rec, cpus);
3692
3693         if (evlist__per_thread(rec->evlist)) {
3694                 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3695                 return -EINVAL;
3696         }
3697
3698         switch (rec->opts.threads_spec) {
3699         case THREAD_SPEC__CPU:
3700                 ret = record__init_thread_cpu_masks(rec, cpus);
3701                 break;
3702         case THREAD_SPEC__CORE:
3703                 ret = record__init_thread_core_masks(rec, cpus);
3704                 break;
3705         case THREAD_SPEC__PACKAGE:
3706                 ret = record__init_thread_package_masks(rec, cpus);
3707                 break;
3708         case THREAD_SPEC__NUMA:
3709                 ret = record__init_thread_numa_masks(rec, cpus);
3710                 break;
3711         case THREAD_SPEC__USER:
3712                 ret = record__init_thread_user_masks(rec, cpus);
3713                 break;
3714         default:
3715                 break;
3716         }
3717
3718         return ret;
3719 }
3720
3721 int cmd_record(int argc, const char **argv)
3722 {
3723         int err;
3724         struct record *rec = &record;
3725         char errbuf[BUFSIZ];
3726
3727         setlocale(LC_ALL, "");
3728
3729 #ifndef HAVE_LIBBPF_SUPPORT
3730 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3731         set_nobuild('\0', "clang-path", true);
3732         set_nobuild('\0', "clang-opt", true);
3733 # undef set_nobuild
3734 #endif
3735
3736 #ifndef HAVE_BPF_PROLOGUE
3737 # if !defined (HAVE_DWARF_SUPPORT)
3738 #  define REASON  "NO_DWARF=1"
3739 # elif !defined (HAVE_LIBBPF_SUPPORT)
3740 #  define REASON  "NO_LIBBPF=1"
3741 # else
3742 #  define REASON  "this architecture doesn't support BPF prologue"
3743 # endif
3744 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3745         set_nobuild('\0', "vmlinux", true);
3746 # undef set_nobuild
3747 # undef REASON
3748 #endif
3749
3750 #ifndef HAVE_BPF_SKEL
3751 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3752         set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3753 # undef set_nobuild
3754 #endif
3755
3756         rec->opts.affinity = PERF_AFFINITY_SYS;
3757
3758         rec->evlist = evlist__new();
3759         if (rec->evlist == NULL)
3760                 return -ENOMEM;
3761
3762         err = perf_config(perf_record_config, rec);
3763         if (err)
3764                 return err;
3765
3766         argc = parse_options(argc, argv, record_options, record_usage,
3767                             PARSE_OPT_STOP_AT_NON_OPTION);
3768         if (quiet)
3769                 perf_quiet_option();
3770
3771         err = symbol__validate_sym_arguments();
3772         if (err)
3773                 return err;
3774
3775         perf_debuginfod_setup(&record.debuginfod);
3776
3777         /* Make system wide (-a) the default target. */
3778         if (!argc && target__none(&rec->opts.target))
3779                 rec->opts.target.system_wide = true;
3780
3781         if (nr_cgroups && !rec->opts.target.system_wide) {
3782                 usage_with_options_msg(record_usage, record_options,
3783                         "cgroup monitoring only available in system-wide mode");
3784
3785         }
3786
3787         if (rec->buildid_mmap) {
3788                 if (!perf_can_record_build_id()) {
3789                         pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3790                         err = -EINVAL;
3791                         goto out_opts;
3792                 }
3793                 pr_debug("Enabling build id in mmap2 events.\n");
3794                 /* Enable mmap build id synthesizing. */
3795                 symbol_conf.buildid_mmap2 = true;
3796                 /* Enable perf_event_attr::build_id bit. */
3797                 rec->opts.build_id = true;
3798                 /* Disable build id cache. */
3799                 rec->no_buildid = true;
3800         }
3801
3802         if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3803                 pr_err("Kernel has no cgroup sampling support.\n");
3804                 err = -EINVAL;
3805                 goto out_opts;
3806         }
3807
3808         if (rec->opts.kcore || record__threads_enabled(rec))
3809                 rec->data.is_dir = true;
3810
3811         if (record__threads_enabled(rec)) {
3812                 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3813                         pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3814                         goto out_opts;
3815                 }
3816                 if (record__aio_enabled(rec)) {
3817                         pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3818                         goto out_opts;
3819                 }
3820         }
3821
3822         if (rec->opts.comp_level != 0) {
3823                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3824                 rec->no_buildid = true;
3825         }
3826
3827         if (rec->opts.record_switch_events &&
3828             !perf_can_record_switch_events()) {
3829                 ui__error("kernel does not support recording context switch events\n");
3830                 parse_options_usage(record_usage, record_options, "switch-events", 0);
3831                 err = -EINVAL;
3832                 goto out_opts;
3833         }
3834
3835         if (switch_output_setup(rec)) {
3836                 parse_options_usage(record_usage, record_options, "switch-output", 0);
3837                 err = -EINVAL;
3838                 goto out_opts;
3839         }
3840
3841         if (rec->switch_output.time) {
3842                 signal(SIGALRM, alarm_sig_handler);
3843                 alarm(rec->switch_output.time);
3844         }
3845
3846         if (rec->switch_output.num_files) {
3847                 rec->switch_output.filenames = calloc(sizeof(char *),
3848                                                       rec->switch_output.num_files);
3849                 if (!rec->switch_output.filenames) {
3850                         err = -EINVAL;
3851                         goto out_opts;
3852                 }
3853         }
3854
3855         if (rec->timestamp_filename && record__threads_enabled(rec)) {
3856                 rec->timestamp_filename = false;
3857                 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3858         }
3859
3860         /*
3861          * Allow aliases to facilitate the lookup of symbols for address
3862          * filters. Refer to auxtrace_parse_filters().
3863          */
3864         symbol_conf.allow_aliases = true;
3865
3866         symbol__init(NULL);
3867
3868         err = record__auxtrace_init(rec);
3869         if (err)
3870                 goto out;
3871
3872         if (dry_run)
3873                 goto out;
3874
3875         err = bpf__setup_stdout(rec->evlist);
3876         if (err) {
3877                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3878                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3879                          errbuf);
3880                 goto out;
3881         }
3882
3883         err = -ENOMEM;
3884
3885         if (rec->no_buildid_cache || rec->no_buildid) {
3886                 disable_buildid_cache();
3887         } else if (rec->switch_output.enabled) {
3888                 /*
3889                  * In 'perf record --switch-output', disable buildid
3890                  * generation by default to reduce data file switching
3891                  * overhead. Still generate buildid if they are required
3892                  * explicitly using
3893                  *
3894                  *  perf record --switch-output --no-no-buildid \
3895                  *              --no-no-buildid-cache
3896                  *
3897                  * Following code equals to:
3898                  *
3899                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
3900                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3901                  *         disable_buildid_cache();
3902                  */
3903                 bool disable = true;
3904
3905                 if (rec->no_buildid_set && !rec->no_buildid)
3906                         disable = false;
3907                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3908                         disable = false;
3909                 if (disable) {
3910                         rec->no_buildid = true;
3911                         rec->no_buildid_cache = true;
3912                         disable_buildid_cache();
3913                 }
3914         }
3915
3916         if (record.opts.overwrite)
3917                 record.opts.tail_synthesize = true;
3918
3919         if (rec->evlist->core.nr_entries == 0) {
3920                 if (perf_pmu__has_hybrid()) {
3921                         err = evlist__add_default_hybrid(rec->evlist,
3922                                                          !record.opts.no_samples);
3923                 } else {
3924                         err = __evlist__add_default(rec->evlist,
3925                                                     !record.opts.no_samples);
3926                 }
3927
3928                 if (err < 0) {
3929                         pr_err("Not enough memory for event selector list\n");
3930                         goto out;
3931                 }
3932         }
3933
3934         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3935                 rec->opts.no_inherit = true;
3936
3937         err = target__validate(&rec->opts.target);
3938         if (err) {
3939                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3940                 ui__warning("%s\n", errbuf);
3941         }
3942
3943         err = target__parse_uid(&rec->opts.target);
3944         if (err) {
3945                 int saved_errno = errno;
3946
3947                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3948                 ui__error("%s", errbuf);
3949
3950                 err = -saved_errno;
3951                 goto out;
3952         }
3953
3954         /* Enable ignoring missing threads when -u/-p option is defined. */
3955         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3956
3957         if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3958                 pr_err("failed to use cpu list %s\n",
3959                        rec->opts.target.cpu_list);
3960                 goto out;
3961         }
3962
3963         rec->opts.target.hybrid = perf_pmu__has_hybrid();
3964
3965         if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3966                 arch__add_leaf_frame_record_opts(&rec->opts);
3967
3968         err = -ENOMEM;
3969         if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3970                 usage_with_options(record_usage, record_options);
3971
3972         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3973         if (err)
3974                 goto out;
3975
3976         /*
3977          * We take all buildids when the file contains
3978          * AUX area tracing data because we do not decode the
3979          * trace because it would take too long.
3980          */
3981         if (rec->opts.full_auxtrace)
3982                 rec->buildid_all = true;
3983
3984         if (rec->opts.text_poke) {
3985                 err = record__config_text_poke(rec->evlist);
3986                 if (err) {
3987                         pr_err("record__config_text_poke failed, error %d\n", err);
3988                         goto out;
3989                 }
3990         }
3991
3992         if (rec->off_cpu) {
3993                 err = record__config_off_cpu(rec);
3994                 if (err) {
3995                         pr_err("record__config_off_cpu failed, error %d\n", err);
3996                         goto out;
3997                 }
3998         }
3999
4000         if (record_opts__config(&rec->opts)) {
4001                 err = -EINVAL;
4002                 goto out;
4003         }
4004
4005         err = record__init_thread_masks(rec);
4006         if (err) {
4007                 pr_err("Failed to initialize parallel data streaming masks\n");
4008                 goto out;
4009         }
4010
4011         if (rec->opts.nr_cblocks > nr_cblocks_max)
4012                 rec->opts.nr_cblocks = nr_cblocks_max;
4013         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4014
4015         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4016         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4017
4018         if (rec->opts.comp_level > comp_level_max)
4019                 rec->opts.comp_level = comp_level_max;
4020         pr_debug("comp level: %d\n", rec->opts.comp_level);
4021
4022         err = __cmd_record(&record, argc, argv);
4023 out:
4024         evlist__delete(rec->evlist);
4025         symbol__exit();
4026         auxtrace_record__free(rec->itr);
4027 out_opts:
4028         record__free_thread_masks(rec, rec->nr_threads);
4029         rec->nr_threads = 0;
4030         evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4031         return err;
4032 }
4033
4034 static void snapshot_sig_handler(int sig __maybe_unused)
4035 {
4036         struct record *rec = &record;
4037
4038         hit_auxtrace_snapshot_trigger(rec);
4039
4040         if (switch_output_signal(rec))
4041                 trigger_hit(&switch_output_trigger);
4042 }
4043
4044 static void alarm_sig_handler(int sig __maybe_unused)
4045 {
4046         struct record *rec = &record;
4047
4048         if (switch_output_time(rec))
4049                 trigger_hit(&switch_output_trigger);
4050 }