GNU Linux-libre 4.14.313-gnu1
[releases.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ./hackbench 10
10
11   Time: 0.118
12
13   Performance counter stats for './hackbench 10':
14
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26
27         0.154822978  seconds time elapsed
28
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/cgroup.h"
47 #include "util/util.h"
48 #include <subcmd/parse-options.h>
49 #include "util/parse-events.h"
50 #include "util/pmu.h"
51 #include "util/event.h"
52 #include "util/evlist.h"
53 #include "util/evsel.h"
54 #include "util/debug.h"
55 #include "util/drv_configs.h"
56 #include "util/color.h"
57 #include "util/stat.h"
58 #include "util/header.h"
59 #include "util/cpumap.h"
60 #include "util/thread.h"
61 #include "util/thread_map.h"
62 #include "util/counts.h"
63 #include "util/group.h"
64 #include "util/session.h"
65 #include "util/tool.h"
66 #include "util/group.h"
67 #include "util/string2.h"
68 #include "asm/bug.h"
69
70 #include <linux/time64.h>
71 #include <api/fs/fs.h>
72 #include <errno.h>
73 #include <signal.h>
74 #include <stdlib.h>
75 #include <sys/prctl.h>
76 #include <inttypes.h>
77 #include <locale.h>
78 #include <math.h>
79 #include <sys/types.h>
80 #include <sys/stat.h>
81 #include <sys/wait.h>
82 #include <unistd.h>
83
84 #include "sane_ctype.h"
85
86 #define DEFAULT_SEPARATOR       " "
87 #define CNTR_NOT_SUPPORTED      "<not supported>"
88 #define CNTR_NOT_COUNTED        "<not counted>"
89 #define FREEZE_ON_SMI_PATH      "devices/cpu/freeze_on_smi"
90
91 static void print_counters(struct timespec *ts, int argc, const char **argv);
92
93 /* Default events used for perf stat -T */
94 static const char *transaction_attrs = {
95         "task-clock,"
96         "{"
97         "instructions,"
98         "cycles,"
99         "cpu/cycles-t/,"
100         "cpu/tx-start/,"
101         "cpu/el-start/,"
102         "cpu/cycles-ct/"
103         "}"
104 };
105
106 /* More limited version when the CPU does not have all events. */
107 static const char * transaction_limited_attrs = {
108         "task-clock,"
109         "{"
110         "instructions,"
111         "cycles,"
112         "cpu/cycles-t/,"
113         "cpu/tx-start/"
114         "}"
115 };
116
117 static const char * topdown_attrs[] = {
118         "topdown-total-slots",
119         "topdown-slots-retired",
120         "topdown-recovery-bubbles",
121         "topdown-fetch-bubbles",
122         "topdown-slots-issued",
123         NULL,
124 };
125
126 static const char *smi_cost_attrs = {
127         "{"
128         "msr/aperf/,"
129         "msr/smi/,"
130         "cycles"
131         "}"
132 };
133
134 static struct perf_evlist       *evsel_list;
135
136 static struct target target = {
137         .uid    = UINT_MAX,
138 };
139
140 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
141
142 static int                      run_count                       =  1;
143 static bool                     no_inherit                      = false;
144 static volatile pid_t           child_pid                       = -1;
145 static bool                     null_run                        =  false;
146 static int                      detailed_run                    =  0;
147 static bool                     transaction_run;
148 static bool                     topdown_run                     = false;
149 static bool                     smi_cost                        = false;
150 static bool                     smi_reset                       = false;
151 static bool                     big_num                         =  true;
152 static int                      big_num_opt                     =  -1;
153 static const char               *csv_sep                        = NULL;
154 static bool                     csv_output                      = false;
155 static bool                     group                           = false;
156 static const char               *pre_cmd                        = NULL;
157 static const char               *post_cmd                       = NULL;
158 static bool                     sync_run                        = false;
159 static unsigned int             initial_delay                   = 0;
160 static unsigned int             unit_width                      = 4; /* strlen("unit") */
161 static bool                     forever                         = false;
162 static bool                     metric_only                     = false;
163 static bool                     force_metric_only               = false;
164 static bool                     no_merge                        = false;
165 static struct timespec          ref_time;
166 static struct cpu_map           *aggr_map;
167 static aggr_get_id_t            aggr_get_id;
168 static bool                     append_file;
169 static const char               *output_name;
170 static int                      output_fd;
171 static int                      print_free_counters_hint;
172
173 struct perf_stat {
174         bool                     record;
175         struct perf_data_file    file;
176         struct perf_session     *session;
177         u64                      bytes_written;
178         struct perf_tool         tool;
179         bool                     maps_allocated;
180         struct cpu_map          *cpus;
181         struct thread_map       *threads;
182         enum aggr_mode           aggr_mode;
183 };
184
185 static struct perf_stat         perf_stat;
186 #define STAT_RECORD             perf_stat.record
187
188 static volatile int done = 0;
189
190 static struct perf_stat_config stat_config = {
191         .aggr_mode      = AGGR_GLOBAL,
192         .scale          = true,
193 };
194
195 static inline void diff_timespec(struct timespec *r, struct timespec *a,
196                                  struct timespec *b)
197 {
198         r->tv_sec = a->tv_sec - b->tv_sec;
199         if (a->tv_nsec < b->tv_nsec) {
200                 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
201                 r->tv_sec--;
202         } else {
203                 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
204         }
205 }
206
207 static void perf_stat__reset_stats(void)
208 {
209         perf_evlist__reset_stats(evsel_list);
210         perf_stat__reset_shadow_stats();
211 }
212
213 static int create_perf_stat_counter(struct perf_evsel *evsel)
214 {
215         struct perf_event_attr *attr = &evsel->attr;
216         struct perf_evsel *leader = evsel->leader;
217
218         if (stat_config.scale) {
219                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
220                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
221         }
222
223         /*
224          * The event is part of non trivial group, let's enable
225          * the group read (for leader) and ID retrieval for all
226          * members.
227          */
228         if (leader->nr_members > 1)
229                 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
230
231         attr->inherit = !no_inherit;
232
233         /*
234          * Some events get initialized with sample_(period/type) set,
235          * like tracepoints. Clear it up for counting.
236          */
237         attr->sample_period = 0;
238
239         /*
240          * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
241          * while avoiding that older tools show confusing messages.
242          *
243          * However for pipe sessions we need to keep it zero,
244          * because script's perf_evsel__check_attr is triggered
245          * by attr->sample_type != 0, and we can't run it on
246          * stat sessions.
247          */
248         if (!(STAT_RECORD && perf_stat.file.is_pipe))
249                 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
250
251         /*
252          * Disabling all counters initially, they will be enabled
253          * either manually by us or by kernel via enable_on_exec
254          * set later.
255          */
256         if (perf_evsel__is_group_leader(evsel)) {
257                 attr->disabled = 1;
258
259                 /*
260                  * In case of initial_delay we enable tracee
261                  * events manually.
262                  */
263                 if (target__none(&target) && !initial_delay)
264                         attr->enable_on_exec = 1;
265         }
266
267         if (target__has_cpu(&target))
268                 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
269
270         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
271 }
272
273 /*
274  * Does the counter have nsecs as a unit?
275  */
276 static inline int nsec_counter(struct perf_evsel *evsel)
277 {
278         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
279             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
280                 return 1;
281
282         return 0;
283 }
284
285 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
286                                      union perf_event *event,
287                                      struct perf_sample *sample __maybe_unused,
288                                      struct machine *machine __maybe_unused)
289 {
290         if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
291                 pr_err("failed to write perf data, error: %m\n");
292                 return -1;
293         }
294
295         perf_stat.bytes_written += event->header.size;
296         return 0;
297 }
298
299 static int write_stat_round_event(u64 tm, u64 type)
300 {
301         return perf_event__synthesize_stat_round(NULL, tm, type,
302                                                  process_synthesized_event,
303                                                  NULL);
304 }
305
306 #define WRITE_STAT_ROUND_EVENT(time, interval) \
307         write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
308
309 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
310
311 static int
312 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
313                              struct perf_counts_values *count)
314 {
315         struct perf_sample_id *sid = SID(counter, cpu, thread);
316
317         return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
318                                            process_synthesized_event, NULL);
319 }
320
321 /*
322  * Read out the results of a single counter:
323  * do not aggregate counts across CPUs in system-wide mode
324  */
325 static int read_counter(struct perf_evsel *counter)
326 {
327         int nthreads = thread_map__nr(evsel_list->threads);
328         int ncpus, cpu, thread;
329
330         if (target__has_cpu(&target))
331                 ncpus = perf_evsel__nr_cpus(counter);
332         else
333                 ncpus = 1;
334
335         if (!counter->supported)
336                 return -ENOENT;
337
338         if (counter->system_wide)
339                 nthreads = 1;
340
341         for (thread = 0; thread < nthreads; thread++) {
342                 for (cpu = 0; cpu < ncpus; cpu++) {
343                         struct perf_counts_values *count;
344
345                         count = perf_counts(counter->counts, cpu, thread);
346
347                         /*
348                          * The leader's group read loads data into its group members
349                          * (via perf_evsel__read_counter) and sets threir count->loaded.
350                          */
351                         if (!count->loaded &&
352                             perf_evsel__read_counter(counter, cpu, thread)) {
353                                 counter->counts->scaled = -1;
354                                 perf_counts(counter->counts, cpu, thread)->ena = 0;
355                                 perf_counts(counter->counts, cpu, thread)->run = 0;
356                                 return -1;
357                         }
358
359                         count->loaded = false;
360
361                         if (STAT_RECORD) {
362                                 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
363                                         pr_err("failed to write stat event\n");
364                                         return -1;
365                                 }
366                         }
367
368                         if (verbose > 1) {
369                                 fprintf(stat_config.output,
370                                         "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
371                                                 perf_evsel__name(counter),
372                                                 cpu,
373                                                 count->val, count->ena, count->run);
374                         }
375                 }
376         }
377
378         return 0;
379 }
380
381 static void read_counters(void)
382 {
383         struct perf_evsel *counter;
384         int ret;
385
386         evlist__for_each_entry(evsel_list, counter) {
387                 ret = read_counter(counter);
388                 if (ret)
389                         pr_debug("failed to read counter %s\n", counter->name);
390
391                 if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
392                         pr_warning("failed to process counter %s\n", counter->name);
393         }
394 }
395
396 static void process_interval(void)
397 {
398         struct timespec ts, rs;
399
400         read_counters();
401
402         clock_gettime(CLOCK_MONOTONIC, &ts);
403         diff_timespec(&rs, &ts, &ref_time);
404
405         if (STAT_RECORD) {
406                 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
407                         pr_err("failed to write stat round event\n");
408         }
409
410         print_counters(&rs, 0, NULL);
411 }
412
413 static void enable_counters(void)
414 {
415         if (initial_delay)
416                 usleep(initial_delay * USEC_PER_MSEC);
417
418         /*
419          * We need to enable counters only if:
420          * - we don't have tracee (attaching to task or cpu)
421          * - we have initial delay configured
422          */
423         if (!target__none(&target) || initial_delay)
424                 perf_evlist__enable(evsel_list);
425 }
426
427 static void disable_counters(void)
428 {
429         /*
430          * If we don't have tracee (attaching to task or cpu), counters may
431          * still be running. To get accurate group ratios, we must stop groups
432          * from counting before reading their constituent counters.
433          */
434         if (!target__none(&target))
435                 perf_evlist__disable(evsel_list);
436 }
437
438 static volatile int workload_exec_errno;
439
440 /*
441  * perf_evlist__prepare_workload will send a SIGUSR1
442  * if the fork fails, since we asked by setting its
443  * want_signal to true.
444  */
445 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
446                                         void *ucontext __maybe_unused)
447 {
448         workload_exec_errno = info->si_value.sival_int;
449 }
450
451 static bool has_unit(struct perf_evsel *counter)
452 {
453         return counter->unit && *counter->unit;
454 }
455
456 static bool has_scale(struct perf_evsel *counter)
457 {
458         return counter->scale != 1;
459 }
460
461 static int perf_stat_synthesize_config(bool is_pipe)
462 {
463         struct perf_evsel *counter;
464         int err;
465
466         if (is_pipe) {
467                 err = perf_event__synthesize_attrs(NULL, perf_stat.session,
468                                                    process_synthesized_event);
469                 if (err < 0) {
470                         pr_err("Couldn't synthesize attrs.\n");
471                         return err;
472                 }
473         }
474
475         /*
476          * Synthesize other events stuff not carried within
477          * attr event - unit, scale, name
478          */
479         evlist__for_each_entry(evsel_list, counter) {
480                 if (!counter->supported)
481                         continue;
482
483                 /*
484                  * Synthesize unit and scale only if it's defined.
485                  */
486                 if (has_unit(counter)) {
487                         err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
488                         if (err < 0) {
489                                 pr_err("Couldn't synthesize evsel unit.\n");
490                                 return err;
491                         }
492                 }
493
494                 if (has_scale(counter)) {
495                         err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
496                         if (err < 0) {
497                                 pr_err("Couldn't synthesize evsel scale.\n");
498                                 return err;
499                         }
500                 }
501
502                 if (counter->own_cpus) {
503                         err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
504                         if (err < 0) {
505                                 pr_err("Couldn't synthesize evsel scale.\n");
506                                 return err;
507                         }
508                 }
509
510                 /*
511                  * Name is needed only for pipe output,
512                  * perf.data carries event names.
513                  */
514                 if (is_pipe) {
515                         err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
516                         if (err < 0) {
517                                 pr_err("Couldn't synthesize evsel name.\n");
518                                 return err;
519                         }
520                 }
521         }
522
523         err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
524                                                 process_synthesized_event,
525                                                 NULL);
526         if (err < 0) {
527                 pr_err("Couldn't synthesize thread map.\n");
528                 return err;
529         }
530
531         err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
532                                              process_synthesized_event, NULL);
533         if (err < 0) {
534                 pr_err("Couldn't synthesize thread map.\n");
535                 return err;
536         }
537
538         err = perf_event__synthesize_stat_config(NULL, &stat_config,
539                                                  process_synthesized_event, NULL);
540         if (err < 0) {
541                 pr_err("Couldn't synthesize config.\n");
542                 return err;
543         }
544
545         return 0;
546 }
547
548 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
549
550 static int __store_counter_ids(struct perf_evsel *counter,
551                                struct cpu_map *cpus,
552                                struct thread_map *threads)
553 {
554         int cpu, thread;
555
556         for (cpu = 0; cpu < cpus->nr; cpu++) {
557                 for (thread = 0; thread < threads->nr; thread++) {
558                         int fd = FD(counter, cpu, thread);
559
560                         if (perf_evlist__id_add_fd(evsel_list, counter,
561                                                    cpu, thread, fd) < 0)
562                                 return -1;
563                 }
564         }
565
566         return 0;
567 }
568
569 static int store_counter_ids(struct perf_evsel *counter)
570 {
571         struct cpu_map *cpus = counter->cpus;
572         struct thread_map *threads = counter->threads;
573
574         if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
575                 return -ENOMEM;
576
577         return __store_counter_ids(counter, cpus, threads);
578 }
579
580 static bool perf_evsel__should_store_id(struct perf_evsel *counter)
581 {
582         return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
583 }
584
585 static int __run_perf_stat(int argc, const char **argv)
586 {
587         int interval = stat_config.interval;
588         char msg[BUFSIZ];
589         unsigned long long t0, t1;
590         struct perf_evsel *counter;
591         struct timespec ts;
592         size_t l;
593         int status = 0;
594         const bool forks = (argc > 0);
595         bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
596         struct perf_evsel_config_term *err_term;
597
598         if (interval) {
599                 ts.tv_sec  = interval / USEC_PER_MSEC;
600                 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
601         } else {
602                 ts.tv_sec  = 1;
603                 ts.tv_nsec = 0;
604         }
605
606         if (forks) {
607                 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
608                                                   workload_exec_failed_signal) < 0) {
609                         perror("failed to prepare workload");
610                         return -1;
611                 }
612                 child_pid = evsel_list->workload.pid;
613         }
614
615         if (group)
616                 perf_evlist__set_leader(evsel_list);
617
618         evlist__for_each_entry(evsel_list, counter) {
619 try_again:
620                 if (create_perf_stat_counter(counter) < 0) {
621                         /*
622                          * PPC returns ENXIO for HW counters until 2.6.37
623                          * (behavior changed with commit b0a873e).
624                          */
625                         if (errno == EINVAL || errno == ENOSYS ||
626                             errno == ENOENT || errno == EOPNOTSUPP ||
627                             errno == ENXIO) {
628                                 if (verbose > 0)
629                                         ui__warning("%s event is not supported by the kernel.\n",
630                                                     perf_evsel__name(counter));
631                                 counter->supported = false;
632
633                                 if ((counter->leader != counter) ||
634                                     !(counter->leader->nr_members > 1))
635                                         continue;
636                         } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
637                                 if (verbose > 0)
638                                         ui__warning("%s\n", msg);
639                                 goto try_again;
640                         }
641
642                         perf_evsel__open_strerror(counter, &target,
643                                                   errno, msg, sizeof(msg));
644                         ui__error("%s\n", msg);
645
646                         if (child_pid != -1)
647                                 kill(child_pid, SIGTERM);
648
649                         return -1;
650                 }
651                 counter->supported = true;
652
653                 l = strlen(counter->unit);
654                 if (l > unit_width)
655                         unit_width = l;
656
657                 if (perf_evsel__should_store_id(counter) &&
658                     store_counter_ids(counter))
659                         return -1;
660         }
661
662         if (perf_evlist__apply_filters(evsel_list, &counter)) {
663                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
664                         counter->filter, perf_evsel__name(counter), errno,
665                         str_error_r(errno, msg, sizeof(msg)));
666                 return -1;
667         }
668
669         if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
670                 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
671                       err_term->val.drv_cfg, perf_evsel__name(counter), errno,
672                       str_error_r(errno, msg, sizeof(msg)));
673                 return -1;
674         }
675
676         if (STAT_RECORD) {
677                 int err, fd = perf_data_file__fd(&perf_stat.file);
678
679                 if (is_pipe) {
680                         err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
681                 } else {
682                         err = perf_session__write_header(perf_stat.session, evsel_list,
683                                                          fd, false);
684                 }
685
686                 if (err < 0)
687                         return err;
688
689                 err = perf_stat_synthesize_config(is_pipe);
690                 if (err < 0)
691                         return err;
692         }
693
694         /*
695          * Enable counters and exec the command:
696          */
697         t0 = rdclock();
698         clock_gettime(CLOCK_MONOTONIC, &ref_time);
699
700         if (forks) {
701                 perf_evlist__start_workload(evsel_list);
702                 enable_counters();
703
704                 if (interval) {
705                         while (!waitpid(child_pid, &status, WNOHANG)) {
706                                 nanosleep(&ts, NULL);
707                                 process_interval();
708                         }
709                 }
710                 waitpid(child_pid, &status, 0);
711
712                 if (workload_exec_errno) {
713                         const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
714                         pr_err("Workload failed: %s\n", emsg);
715                         return -1;
716                 }
717
718                 if (WIFSIGNALED(status))
719                         psignal(WTERMSIG(status), argv[0]);
720         } else {
721                 enable_counters();
722                 while (!done) {
723                         nanosleep(&ts, NULL);
724                         if (interval)
725                                 process_interval();
726                 }
727         }
728
729         disable_counters();
730
731         t1 = rdclock();
732
733         update_stats(&walltime_nsecs_stats, t1 - t0);
734
735         /*
736          * Closing a group leader splits the group, and as we only disable
737          * group leaders, results in remaining events becoming enabled. To
738          * avoid arbitrary skew, we must read all counters before closing any
739          * group leaders.
740          */
741         read_counters();
742         perf_evlist__close(evsel_list);
743
744         return WEXITSTATUS(status);
745 }
746
747 static int run_perf_stat(int argc, const char **argv)
748 {
749         int ret;
750
751         if (pre_cmd) {
752                 ret = system(pre_cmd);
753                 if (ret)
754                         return ret;
755         }
756
757         if (sync_run)
758                 sync();
759
760         ret = __run_perf_stat(argc, argv);
761         if (ret)
762                 return ret;
763
764         if (post_cmd) {
765                 ret = system(post_cmd);
766                 if (ret)
767                         return ret;
768         }
769
770         return ret;
771 }
772
773 static void print_running(u64 run, u64 ena)
774 {
775         if (csv_output) {
776                 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
777                                         csv_sep,
778                                         run,
779                                         csv_sep,
780                                         ena ? 100.0 * run / ena : 100.0);
781         } else if (run != ena) {
782                 fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
783         }
784 }
785
786 static void print_noise_pct(double total, double avg)
787 {
788         double pct = rel_stddev_stats(total, avg);
789
790         if (csv_output)
791                 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
792         else if (pct)
793                 fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
794 }
795
796 static void print_noise(struct perf_evsel *evsel, double avg)
797 {
798         struct perf_stat_evsel *ps;
799
800         if (run_count == 1)
801                 return;
802
803         ps = evsel->priv;
804         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
805 }
806
807 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
808 {
809         switch (stat_config.aggr_mode) {
810         case AGGR_CORE:
811                 fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
812                         cpu_map__id_to_socket(id),
813                         csv_output ? 0 : -8,
814                         cpu_map__id_to_cpu(id),
815                         csv_sep,
816                         csv_output ? 0 : 4,
817                         nr,
818                         csv_sep);
819                 break;
820         case AGGR_SOCKET:
821                 fprintf(stat_config.output, "S%*d%s%*d%s",
822                         csv_output ? 0 : -5,
823                         id,
824                         csv_sep,
825                         csv_output ? 0 : 4,
826                         nr,
827                         csv_sep);
828                         break;
829         case AGGR_NONE:
830                 fprintf(stat_config.output, "CPU%*d%s",
831                         csv_output ? 0 : -4,
832                         perf_evsel__cpus(evsel)->map[id], csv_sep);
833                 break;
834         case AGGR_THREAD:
835                 fprintf(stat_config.output, "%*s-%*d%s",
836                         csv_output ? 0 : 16,
837                         thread_map__comm(evsel->threads, id),
838                         csv_output ? 0 : -8,
839                         thread_map__pid(evsel->threads, id),
840                         csv_sep);
841                 break;
842         case AGGR_GLOBAL:
843         case AGGR_UNSET:
844         default:
845                 break;
846         }
847 }
848
849 struct outstate {
850         FILE *fh;
851         bool newline;
852         const char *prefix;
853         int  nfields;
854         int  id, nr;
855         struct perf_evsel *evsel;
856 };
857
858 #define METRIC_LEN  35
859
860 static void new_line_std(void *ctx)
861 {
862         struct outstate *os = ctx;
863
864         os->newline = true;
865 }
866
867 static void do_new_line_std(struct outstate *os)
868 {
869         fputc('\n', os->fh);
870         fputs(os->prefix, os->fh);
871         aggr_printout(os->evsel, os->id, os->nr);
872         if (stat_config.aggr_mode == AGGR_NONE)
873                 fprintf(os->fh, "        ");
874         fprintf(os->fh, "                                                 ");
875 }
876
877 static void print_metric_std(void *ctx, const char *color, const char *fmt,
878                              const char *unit, double val)
879 {
880         struct outstate *os = ctx;
881         FILE *out = os->fh;
882         int n;
883         bool newline = os->newline;
884
885         os->newline = false;
886
887         if (unit == NULL || fmt == NULL) {
888                 fprintf(out, "%-*s", METRIC_LEN, "");
889                 return;
890         }
891
892         if (newline)
893                 do_new_line_std(os);
894
895         n = fprintf(out, " # ");
896         if (color)
897                 n += color_fprintf(out, color, fmt, val);
898         else
899                 n += fprintf(out, fmt, val);
900         fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
901 }
902
903 static void new_line_csv(void *ctx)
904 {
905         struct outstate *os = ctx;
906         int i;
907
908         fputc('\n', os->fh);
909         if (os->prefix)
910                 fprintf(os->fh, "%s%s", os->prefix, csv_sep);
911         aggr_printout(os->evsel, os->id, os->nr);
912         for (i = 0; i < os->nfields; i++)
913                 fputs(csv_sep, os->fh);
914 }
915
916 static void print_metric_csv(void *ctx,
917                              const char *color __maybe_unused,
918                              const char *fmt, const char *unit, double val)
919 {
920         struct outstate *os = ctx;
921         FILE *out = os->fh;
922         char buf[64], *vals, *ends;
923
924         if (unit == NULL || fmt == NULL) {
925                 fprintf(out, "%s%s", csv_sep, csv_sep);
926                 return;
927         }
928         snprintf(buf, sizeof(buf), fmt, val);
929         ends = vals = ltrim(buf);
930         while (isdigit(*ends) || *ends == '.')
931                 ends++;
932         *ends = 0;
933         while (isspace(*unit))
934                 unit++;
935         fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
936 }
937
938 #define METRIC_ONLY_LEN 20
939
940 /* Filter out some columns that don't work well in metrics only mode */
941
942 static bool valid_only_metric(const char *unit)
943 {
944         if (!unit)
945                 return false;
946         if (strstr(unit, "/sec") ||
947             strstr(unit, "hz") ||
948             strstr(unit, "Hz") ||
949             strstr(unit, "CPUs utilized"))
950                 return false;
951         return true;
952 }
953
954 static const char *fixunit(char *buf, struct perf_evsel *evsel,
955                            const char *unit)
956 {
957         if (!strncmp(unit, "of all", 6)) {
958                 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
959                          unit);
960                 return buf;
961         }
962         return unit;
963 }
964
965 static void print_metric_only(void *ctx, const char *color, const char *fmt,
966                               const char *unit, double val)
967 {
968         struct outstate *os = ctx;
969         FILE *out = os->fh;
970         int n;
971         char buf[1024];
972         unsigned mlen = METRIC_ONLY_LEN;
973
974         if (!valid_only_metric(unit))
975                 return;
976         unit = fixunit(buf, os->evsel, unit);
977         if (color)
978                 n = color_fprintf(out, color, fmt, val);
979         else
980                 n = fprintf(out, fmt, val);
981         if (n > METRIC_ONLY_LEN)
982                 n = METRIC_ONLY_LEN;
983         if (mlen < strlen(unit))
984                 mlen = strlen(unit) + 1;
985         fprintf(out, "%*s", mlen - n, "");
986 }
987
988 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
989                                   const char *fmt,
990                                   const char *unit, double val)
991 {
992         struct outstate *os = ctx;
993         FILE *out = os->fh;
994         char buf[64], *vals, *ends;
995         char tbuf[1024];
996
997         if (!valid_only_metric(unit))
998                 return;
999         unit = fixunit(tbuf, os->evsel, unit);
1000         snprintf(buf, sizeof buf, fmt, val);
1001         ends = vals = ltrim(buf);
1002         while (isdigit(*ends) || *ends == '.')
1003                 ends++;
1004         *ends = 0;
1005         fprintf(out, "%s%s", vals, csv_sep);
1006 }
1007
1008 static void new_line_metric(void *ctx __maybe_unused)
1009 {
1010 }
1011
1012 static void print_metric_header(void *ctx, const char *color __maybe_unused,
1013                                 const char *fmt __maybe_unused,
1014                                 const char *unit, double val __maybe_unused)
1015 {
1016         struct outstate *os = ctx;
1017         char tbuf[1024];
1018
1019         if (!valid_only_metric(unit))
1020                 return;
1021         unit = fixunit(tbuf, os->evsel, unit);
1022         if (csv_output)
1023                 fprintf(os->fh, "%s%s", unit, csv_sep);
1024         else
1025                 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
1026 }
1027
1028 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1029 {
1030         FILE *output = stat_config.output;
1031         double msecs = avg / NSEC_PER_MSEC;
1032         const char *fmt_v, *fmt_n;
1033         char name[25];
1034
1035         fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
1036         fmt_n = csv_output ? "%s" : "%-25s";
1037
1038         aggr_printout(evsel, id, nr);
1039
1040         scnprintf(name, sizeof(name), "%s%s",
1041                   perf_evsel__name(evsel), csv_output ? "" : " (msec)");
1042
1043         fprintf(output, fmt_v, msecs, csv_sep);
1044
1045         if (csv_output)
1046                 fprintf(output, "%s%s", evsel->unit, csv_sep);
1047         else
1048                 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
1049
1050         fprintf(output, fmt_n, name);
1051
1052         if (evsel->cgrp)
1053                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
1054 }
1055
1056 static int first_shadow_cpu(struct perf_evsel *evsel, int id)
1057 {
1058         int i;
1059
1060         if (!aggr_get_id)
1061                 return 0;
1062
1063         if (stat_config.aggr_mode == AGGR_NONE)
1064                 return id;
1065
1066         if (stat_config.aggr_mode == AGGR_GLOBAL)
1067                 return 0;
1068
1069         for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
1070                 int cpu2 = perf_evsel__cpus(evsel)->map[i];
1071
1072                 if (aggr_get_id(evsel_list->cpus, cpu2) == id)
1073                         return cpu2;
1074         }
1075         return 0;
1076 }
1077
1078 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1079 {
1080         FILE *output = stat_config.output;
1081         double sc =  evsel->scale;
1082         const char *fmt;
1083
1084         if (csv_output) {
1085                 fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
1086         } else {
1087                 if (big_num)
1088                         fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
1089                 else
1090                         fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
1091         }
1092
1093         aggr_printout(evsel, id, nr);
1094
1095         fprintf(output, fmt, avg, csv_sep);
1096
1097         if (evsel->unit)
1098                 fprintf(output, "%-*s%s",
1099                         csv_output ? 0 : unit_width,
1100                         evsel->unit, csv_sep);
1101
1102         fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
1103
1104         if (evsel->cgrp)
1105                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
1106 }
1107
1108 static void printout(int id, int nr, struct perf_evsel *counter, double uval,
1109                      char *prefix, u64 run, u64 ena, double noise)
1110 {
1111         struct perf_stat_output_ctx out;
1112         struct outstate os = {
1113                 .fh = stat_config.output,
1114                 .prefix = prefix ? prefix : "",
1115                 .id = id,
1116                 .nr = nr,
1117                 .evsel = counter,
1118         };
1119         print_metric_t pm = print_metric_std;
1120         void (*nl)(void *);
1121
1122         if (metric_only) {
1123                 nl = new_line_metric;
1124                 if (csv_output)
1125                         pm = print_metric_only_csv;
1126                 else
1127                         pm = print_metric_only;
1128         } else
1129                 nl = new_line_std;
1130
1131         if (csv_output && !metric_only) {
1132                 static int aggr_fields[] = {
1133                         [AGGR_GLOBAL] = 0,
1134                         [AGGR_THREAD] = 1,
1135                         [AGGR_NONE] = 1,
1136                         [AGGR_SOCKET] = 2,
1137                         [AGGR_CORE] = 2,
1138                 };
1139
1140                 pm = print_metric_csv;
1141                 nl = new_line_csv;
1142                 os.nfields = 3;
1143                 os.nfields += aggr_fields[stat_config.aggr_mode];
1144                 if (counter->cgrp)
1145                         os.nfields++;
1146         }
1147         if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
1148                 if (metric_only) {
1149                         pm(&os, NULL, "", "", 0);
1150                         return;
1151                 }
1152                 aggr_printout(counter, id, nr);
1153
1154                 fprintf(stat_config.output, "%*s%s",
1155                         csv_output ? 0 : 18,
1156                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1157                         csv_sep);
1158
1159                 if (counter->supported)
1160                         print_free_counters_hint = 1;
1161
1162                 fprintf(stat_config.output, "%-*s%s",
1163                         csv_output ? 0 : unit_width,
1164                         counter->unit, csv_sep);
1165
1166                 fprintf(stat_config.output, "%*s",
1167                         csv_output ? 0 : -25,
1168                         perf_evsel__name(counter));
1169
1170                 if (counter->cgrp)
1171                         fprintf(stat_config.output, "%s%s",
1172                                 csv_sep, counter->cgrp->name);
1173
1174                 if (!csv_output)
1175                         pm(&os, NULL, NULL, "", 0);
1176                 print_noise(counter, noise);
1177                 print_running(run, ena);
1178                 if (csv_output)
1179                         pm(&os, NULL, NULL, "", 0);
1180                 return;
1181         }
1182
1183         if (metric_only)
1184                 /* nothing */;
1185         else if (nsec_counter(counter))
1186                 nsec_printout(id, nr, counter, uval);
1187         else
1188                 abs_printout(id, nr, counter, uval);
1189
1190         out.print_metric = pm;
1191         out.new_line = nl;
1192         out.ctx = &os;
1193         out.force_header = false;
1194
1195         if (csv_output && !metric_only) {
1196                 print_noise(counter, noise);
1197                 print_running(run, ena);
1198         }
1199
1200         perf_stat__print_shadow_stats(counter, uval,
1201                                 first_shadow_cpu(counter, id),
1202                                 &out);
1203         if (!csv_output && !metric_only) {
1204                 print_noise(counter, noise);
1205                 print_running(run, ena);
1206         }
1207 }
1208
1209 static void aggr_update_shadow(void)
1210 {
1211         int cpu, s2, id, s;
1212         u64 val;
1213         struct perf_evsel *counter;
1214
1215         for (s = 0; s < aggr_map->nr; s++) {
1216                 id = aggr_map->map[s];
1217                 evlist__for_each_entry(evsel_list, counter) {
1218                         val = 0;
1219                         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1220                                 s2 = aggr_get_id(evsel_list->cpus, cpu);
1221                                 if (s2 != id)
1222                                         continue;
1223                                 val += perf_counts(counter->counts, cpu, 0)->val;
1224                         }
1225                         val = val * counter->scale;
1226                         perf_stat__update_shadow_stats(counter, &val,
1227                                                        first_shadow_cpu(counter, id));
1228                 }
1229         }
1230 }
1231
1232 static void collect_all_aliases(struct perf_evsel *counter,
1233                             void (*cb)(struct perf_evsel *counter, void *data,
1234                                        bool first),
1235                             void *data)
1236 {
1237         struct perf_evsel *alias;
1238
1239         alias = list_prepare_entry(counter, &(evsel_list->entries), node);
1240         list_for_each_entry_continue (alias, &evsel_list->entries, node) {
1241                 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
1242                     alias->scale != counter->scale ||
1243                     alias->cgrp != counter->cgrp ||
1244                     strcmp(alias->unit, counter->unit) ||
1245                     nsec_counter(alias) != nsec_counter(counter))
1246                         break;
1247                 alias->merged_stat = true;
1248                 cb(alias, data, false);
1249         }
1250 }
1251
1252 static bool collect_data(struct perf_evsel *counter,
1253                             void (*cb)(struct perf_evsel *counter, void *data,
1254                                        bool first),
1255                             void *data)
1256 {
1257         if (counter->merged_stat)
1258                 return false;
1259         cb(counter, data, true);
1260         if (!no_merge && counter->auto_merge_stats)
1261                 collect_all_aliases(counter, cb, data);
1262         return true;
1263 }
1264
1265 struct aggr_data {
1266         u64 ena, run, val;
1267         int id;
1268         int nr;
1269         int cpu;
1270 };
1271
1272 static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
1273 {
1274         struct aggr_data *ad = data;
1275         int cpu, s2;
1276
1277         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1278                 struct perf_counts_values *counts;
1279
1280                 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
1281                 if (s2 != ad->id)
1282                         continue;
1283                 if (first)
1284                         ad->nr++;
1285                 counts = perf_counts(counter->counts, cpu, 0);
1286                 /*
1287                  * When any result is bad, make them all to give
1288                  * consistent output in interval mode.
1289                  */
1290                 if (counts->ena == 0 || counts->run == 0 ||
1291                     counter->counts->scaled == -1) {
1292                         ad->ena = 0;
1293                         ad->run = 0;
1294                         break;
1295                 }
1296                 ad->val += counts->val;
1297                 ad->ena += counts->ena;
1298                 ad->run += counts->run;
1299         }
1300 }
1301
1302 static void print_aggr(char *prefix)
1303 {
1304         FILE *output = stat_config.output;
1305         struct perf_evsel *counter;
1306         int s, id, nr;
1307         double uval;
1308         u64 ena, run, val;
1309         bool first;
1310
1311         if (!(aggr_map || aggr_get_id))
1312                 return;
1313
1314         aggr_update_shadow();
1315
1316         /*
1317          * With metric_only everything is on a single line.
1318          * Without each counter has its own line.
1319          */
1320         for (s = 0; s < aggr_map->nr; s++) {
1321                 struct aggr_data ad;
1322                 if (prefix && metric_only)
1323                         fprintf(output, "%s", prefix);
1324
1325                 ad.id = id = aggr_map->map[s];
1326                 first = true;
1327                 evlist__for_each_entry(evsel_list, counter) {
1328                         ad.val = ad.ena = ad.run = 0;
1329                         ad.nr = 0;
1330                         if (!collect_data(counter, aggr_cb, &ad))
1331                                 continue;
1332                         nr = ad.nr;
1333                         ena = ad.ena;
1334                         run = ad.run;
1335                         val = ad.val;
1336                         if (first && metric_only) {
1337                                 first = false;
1338                                 aggr_printout(counter, id, nr);
1339                         }
1340                         if (prefix && !metric_only)
1341                                 fprintf(output, "%s", prefix);
1342
1343                         uval = val * counter->scale;
1344                         printout(id, nr, counter, uval, prefix, run, ena, 1.0);
1345                         if (!metric_only)
1346                                 fputc('\n', output);
1347                 }
1348                 if (metric_only)
1349                         fputc('\n', output);
1350         }
1351 }
1352
1353 static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
1354 {
1355         FILE *output = stat_config.output;
1356         int nthreads = thread_map__nr(counter->threads);
1357         int ncpus = cpu_map__nr(counter->cpus);
1358         int cpu, thread;
1359         double uval;
1360
1361         for (thread = 0; thread < nthreads; thread++) {
1362                 u64 ena = 0, run = 0, val = 0;
1363
1364                 for (cpu = 0; cpu < ncpus; cpu++) {
1365                         val += perf_counts(counter->counts, cpu, thread)->val;
1366                         ena += perf_counts(counter->counts, cpu, thread)->ena;
1367                         run += perf_counts(counter->counts, cpu, thread)->run;
1368                 }
1369
1370                 if (prefix)
1371                         fprintf(output, "%s", prefix);
1372
1373                 uval = val * counter->scale;
1374                 printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
1375                 fputc('\n', output);
1376         }
1377 }
1378
1379 struct caggr_data {
1380         double avg, avg_enabled, avg_running;
1381 };
1382
1383 static void counter_aggr_cb(struct perf_evsel *counter, void *data,
1384                             bool first __maybe_unused)
1385 {
1386         struct caggr_data *cd = data;
1387         struct perf_stat_evsel *ps = counter->priv;
1388
1389         cd->avg += avg_stats(&ps->res_stats[0]);
1390         cd->avg_enabled += avg_stats(&ps->res_stats[1]);
1391         cd->avg_running += avg_stats(&ps->res_stats[2]);
1392 }
1393
1394 /*
1395  * Print out the results of a single counter:
1396  * aggregated counts in system-wide mode
1397  */
1398 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1399 {
1400         FILE *output = stat_config.output;
1401         double uval;
1402         struct caggr_data cd = { .avg = 0.0 };
1403
1404         if (!collect_data(counter, counter_aggr_cb, &cd))
1405                 return;
1406
1407         if (prefix && !metric_only)
1408                 fprintf(output, "%s", prefix);
1409
1410         uval = cd.avg * counter->scale;
1411         printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg);
1412         if (!metric_only)
1413                 fprintf(output, "\n");
1414 }
1415
1416 static void counter_cb(struct perf_evsel *counter, void *data,
1417                        bool first __maybe_unused)
1418 {
1419         struct aggr_data *ad = data;
1420
1421         ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
1422         ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
1423         ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
1424 }
1425
1426 /*
1427  * Print out the results of a single counter:
1428  * does not use aggregated count in system-wide
1429  */
1430 static void print_counter(struct perf_evsel *counter, char *prefix)
1431 {
1432         FILE *output = stat_config.output;
1433         u64 ena, run, val;
1434         double uval;
1435         int cpu;
1436
1437         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1438                 struct aggr_data ad = { .cpu = cpu };
1439
1440                 if (!collect_data(counter, counter_cb, &ad))
1441                         return;
1442                 val = ad.val;
1443                 ena = ad.ena;
1444                 run = ad.run;
1445
1446                 if (prefix)
1447                         fprintf(output, "%s", prefix);
1448
1449                 uval = val * counter->scale;
1450                 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
1451
1452                 fputc('\n', output);
1453         }
1454 }
1455
1456 static void print_no_aggr_metric(char *prefix)
1457 {
1458         int cpu;
1459         int nrcpus = 0;
1460         struct perf_evsel *counter;
1461         u64 ena, run, val;
1462         double uval;
1463
1464         nrcpus = evsel_list->cpus->nr;
1465         for (cpu = 0; cpu < nrcpus; cpu++) {
1466                 bool first = true;
1467
1468                 if (prefix)
1469                         fputs(prefix, stat_config.output);
1470                 evlist__for_each_entry(evsel_list, counter) {
1471                         if (first) {
1472                                 aggr_printout(counter, cpu, 0);
1473                                 first = false;
1474                         }
1475                         val = perf_counts(counter->counts, cpu, 0)->val;
1476                         ena = perf_counts(counter->counts, cpu, 0)->ena;
1477                         run = perf_counts(counter->counts, cpu, 0)->run;
1478
1479                         uval = val * counter->scale;
1480                         printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
1481                 }
1482                 fputc('\n', stat_config.output);
1483         }
1484 }
1485
1486 static int aggr_header_lens[] = {
1487         [AGGR_CORE] = 18,
1488         [AGGR_SOCKET] = 12,
1489         [AGGR_NONE] = 6,
1490         [AGGR_THREAD] = 24,
1491         [AGGR_GLOBAL] = 0,
1492 };
1493
1494 static const char *aggr_header_csv[] = {
1495         [AGGR_CORE]     =       "core,cpus,",
1496         [AGGR_SOCKET]   =       "socket,cpus",
1497         [AGGR_NONE]     =       "cpu,",
1498         [AGGR_THREAD]   =       "comm-pid,",
1499         [AGGR_GLOBAL]   =       ""
1500 };
1501
1502 static void print_metric_headers(const char *prefix, bool no_indent)
1503 {
1504         struct perf_stat_output_ctx out;
1505         struct perf_evsel *counter;
1506         struct outstate os = {
1507                 .fh = stat_config.output
1508         };
1509
1510         if (prefix)
1511                 fprintf(stat_config.output, "%s", prefix);
1512
1513         if (!csv_output && !no_indent)
1514                 fprintf(stat_config.output, "%*s",
1515                         aggr_header_lens[stat_config.aggr_mode], "");
1516         if (csv_output) {
1517                 if (stat_config.interval)
1518                         fputs("time,", stat_config.output);
1519                 fputs(aggr_header_csv[stat_config.aggr_mode],
1520                         stat_config.output);
1521         }
1522
1523         /* Print metrics headers only */
1524         evlist__for_each_entry(evsel_list, counter) {
1525                 os.evsel = counter;
1526                 out.ctx = &os;
1527                 out.print_metric = print_metric_header;
1528                 out.new_line = new_line_metric;
1529                 out.force_header = true;
1530                 os.evsel = counter;
1531                 perf_stat__print_shadow_stats(counter, 0,
1532                                               0,
1533                                               &out);
1534         }
1535         fputc('\n', stat_config.output);
1536 }
1537
1538 static void print_interval(char *prefix, struct timespec *ts)
1539 {
1540         FILE *output = stat_config.output;
1541         static int num_print_interval;
1542
1543         sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
1544
1545         if (num_print_interval == 0 && !csv_output) {
1546                 switch (stat_config.aggr_mode) {
1547                 case AGGR_SOCKET:
1548                         fprintf(output, "#           time socket cpus");
1549                         if (!metric_only)
1550                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1551                         break;
1552                 case AGGR_CORE:
1553                         fprintf(output, "#           time core         cpus");
1554                         if (!metric_only)
1555                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1556                         break;
1557                 case AGGR_NONE:
1558                         fprintf(output, "#           time CPU");
1559                         if (!metric_only)
1560                                 fprintf(output, "                counts %*s events\n", unit_width, "unit");
1561                         break;
1562                 case AGGR_THREAD:
1563                         fprintf(output, "#           time             comm-pid");
1564                         if (!metric_only)
1565                                 fprintf(output, "                  counts %*s events\n", unit_width, "unit");
1566                         break;
1567                 case AGGR_GLOBAL:
1568                 default:
1569                         fprintf(output, "#           time");
1570                         if (!metric_only)
1571                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1572                 case AGGR_UNSET:
1573                         break;
1574                 }
1575         }
1576
1577         if (num_print_interval == 0 && metric_only)
1578                 print_metric_headers(" ", true);
1579         if (++num_print_interval == 25)
1580                 num_print_interval = 0;
1581 }
1582
1583 static void print_header(int argc, const char **argv)
1584 {
1585         FILE *output = stat_config.output;
1586         int i;
1587
1588         fflush(stdout);
1589
1590         if (!csv_output) {
1591                 fprintf(output, "\n");
1592                 fprintf(output, " Performance counter stats for ");
1593                 if (target.system_wide)
1594                         fprintf(output, "\'system wide");
1595                 else if (target.cpu_list)
1596                         fprintf(output, "\'CPU(s) %s", target.cpu_list);
1597                 else if (!target__has_task(&target)) {
1598                         fprintf(output, "\'%s", argv ? argv[0] : "pipe");
1599                         for (i = 1; argv && (i < argc); i++)
1600                                 fprintf(output, " %s", argv[i]);
1601                 } else if (target.pid)
1602                         fprintf(output, "process id \'%s", target.pid);
1603                 else
1604                         fprintf(output, "thread id \'%s", target.tid);
1605
1606                 fprintf(output, "\'");
1607                 if (run_count > 1)
1608                         fprintf(output, " (%d runs)", run_count);
1609                 fprintf(output, ":\n\n");
1610         }
1611 }
1612
1613 static void print_footer(void)
1614 {
1615         FILE *output = stat_config.output;
1616         int n;
1617
1618         if (!null_run)
1619                 fprintf(output, "\n");
1620         fprintf(output, " %17.9f seconds time elapsed",
1621                         avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
1622         if (run_count > 1) {
1623                 fprintf(output, "                                        ");
1624                 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1625                                 avg_stats(&walltime_nsecs_stats));
1626         }
1627         fprintf(output, "\n\n");
1628
1629         if (print_free_counters_hint &&
1630             sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
1631             n > 0)
1632                 fprintf(output,
1633 "Some events weren't counted. Try disabling the NMI watchdog:\n"
1634 "       echo 0 > /proc/sys/kernel/nmi_watchdog\n"
1635 "       perf stat ...\n"
1636 "       echo 1 > /proc/sys/kernel/nmi_watchdog\n");
1637 }
1638
1639 static void print_counters(struct timespec *ts, int argc, const char **argv)
1640 {
1641         int interval = stat_config.interval;
1642         struct perf_evsel *counter;
1643         char buf[64], *prefix = NULL;
1644
1645         /* Do not print anything if we record to the pipe. */
1646         if (STAT_RECORD && perf_stat.file.is_pipe)
1647                 return;
1648
1649         if (interval)
1650                 print_interval(prefix = buf, ts);
1651         else
1652                 print_header(argc, argv);
1653
1654         if (metric_only) {
1655                 static int num_print_iv;
1656
1657                 if (num_print_iv == 0 && !interval)
1658                         print_metric_headers(prefix, false);
1659                 if (num_print_iv++ == 25)
1660                         num_print_iv = 0;
1661                 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
1662                         fprintf(stat_config.output, "%s", prefix);
1663         }
1664
1665         switch (stat_config.aggr_mode) {
1666         case AGGR_CORE:
1667         case AGGR_SOCKET:
1668                 print_aggr(prefix);
1669                 break;
1670         case AGGR_THREAD:
1671                 evlist__for_each_entry(evsel_list, counter)
1672                         print_aggr_thread(counter, prefix);
1673                 break;
1674         case AGGR_GLOBAL:
1675                 evlist__for_each_entry(evsel_list, counter)
1676                         print_counter_aggr(counter, prefix);
1677                 if (metric_only)
1678                         fputc('\n', stat_config.output);
1679                 break;
1680         case AGGR_NONE:
1681                 if (metric_only)
1682                         print_no_aggr_metric(prefix);
1683                 else {
1684                         evlist__for_each_entry(evsel_list, counter)
1685                                 print_counter(counter, prefix);
1686                 }
1687                 break;
1688         case AGGR_UNSET:
1689         default:
1690                 break;
1691         }
1692
1693         if (!interval && !csv_output)
1694                 print_footer();
1695
1696         fflush(stat_config.output);
1697 }
1698
1699 static volatile int signr = -1;
1700
1701 static void skip_signal(int signo)
1702 {
1703         if ((child_pid == -1) || stat_config.interval)
1704                 done = 1;
1705
1706         signr = signo;
1707         /*
1708          * render child_pid harmless
1709          * won't send SIGTERM to a random
1710          * process in case of race condition
1711          * and fast PID recycling
1712          */
1713         child_pid = -1;
1714 }
1715
1716 static void sig_atexit(void)
1717 {
1718         sigset_t set, oset;
1719
1720         /*
1721          * avoid race condition with SIGCHLD handler
1722          * in skip_signal() which is modifying child_pid
1723          * goal is to avoid send SIGTERM to a random
1724          * process
1725          */
1726         sigemptyset(&set);
1727         sigaddset(&set, SIGCHLD);
1728         sigprocmask(SIG_BLOCK, &set, &oset);
1729
1730         if (child_pid != -1)
1731                 kill(child_pid, SIGTERM);
1732
1733         sigprocmask(SIG_SETMASK, &oset, NULL);
1734
1735         if (signr == -1)
1736                 return;
1737
1738         signal(signr, SIG_DFL);
1739         kill(getpid(), signr);
1740 }
1741
1742 static int stat__set_big_num(const struct option *opt __maybe_unused,
1743                              const char *s __maybe_unused, int unset)
1744 {
1745         big_num_opt = unset ? 0 : 1;
1746         return 0;
1747 }
1748
1749 static int enable_metric_only(const struct option *opt __maybe_unused,
1750                               const char *s __maybe_unused, int unset)
1751 {
1752         force_metric_only = true;
1753         metric_only = !unset;
1754         return 0;
1755 }
1756
1757 static const struct option stat_options[] = {
1758         OPT_BOOLEAN('T', "transaction", &transaction_run,
1759                     "hardware transaction statistics"),
1760         OPT_CALLBACK('e', "event", &evsel_list, "event",
1761                      "event selector. use 'perf list' to list available events",
1762                      parse_events_option),
1763         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1764                      "event filter", parse_filter),
1765         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1766                     "child tasks do not inherit counters"),
1767         OPT_STRING('p', "pid", &target.pid, "pid",
1768                    "stat events on existing process id"),
1769         OPT_STRING('t', "tid", &target.tid, "tid",
1770                    "stat events on existing thread id"),
1771         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1772                     "system-wide collection from all CPUs"),
1773         OPT_BOOLEAN('g', "group", &group,
1774                     "put the counters into a counter group"),
1775         OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1776         OPT_INCR('v', "verbose", &verbose,
1777                     "be more verbose (show counter open errors, etc)"),
1778         OPT_INTEGER('r', "repeat", &run_count,
1779                     "repeat command and print average + stddev (max: 100, forever: 0)"),
1780         OPT_BOOLEAN('n', "null", &null_run,
1781                     "null run - dont start any counters"),
1782         OPT_INCR('d', "detailed", &detailed_run,
1783                     "detailed run - start a lot of events"),
1784         OPT_BOOLEAN('S', "sync", &sync_run,
1785                     "call sync() before starting a run"),
1786         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1787                            "print large numbers with thousands\' separators",
1788                            stat__set_big_num),
1789         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1790                     "list of cpus to monitor in system-wide"),
1791         OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1792                     "disable CPU count aggregation", AGGR_NONE),
1793         OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
1794         OPT_STRING('x', "field-separator", &csv_sep, "separator",
1795                    "print counts with custom separator"),
1796         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1797                      "monitor event in cgroup name only", parse_cgroups),
1798         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1799         OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1800         OPT_INTEGER(0, "log-fd", &output_fd,
1801                     "log output to fd, instead of stderr"),
1802         OPT_STRING(0, "pre", &pre_cmd, "command",
1803                         "command to run prior to the measured command"),
1804         OPT_STRING(0, "post", &post_cmd, "command",
1805                         "command to run after to the measured command"),
1806         OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1807                     "print counts at regular interval in ms (>= 10)"),
1808         OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1809                      "aggregate counts per processor socket", AGGR_SOCKET),
1810         OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1811                      "aggregate counts per physical processor core", AGGR_CORE),
1812         OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1813                      "aggregate counts per thread", AGGR_THREAD),
1814         OPT_UINTEGER('D', "delay", &initial_delay,
1815                      "ms to wait before starting measurement after program start"),
1816         OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
1817                         "Only print computed metrics. No raw values", enable_metric_only),
1818         OPT_BOOLEAN(0, "topdown", &topdown_run,
1819                         "measure topdown level 1 statistics"),
1820         OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1821                         "measure SMI cost"),
1822         OPT_END()
1823 };
1824
1825 static int perf_stat__get_socket(struct cpu_map *map, int cpu)
1826 {
1827         return cpu_map__get_socket(map, cpu, NULL);
1828 }
1829
1830 static int perf_stat__get_core(struct cpu_map *map, int cpu)
1831 {
1832         return cpu_map__get_core(map, cpu, NULL);
1833 }
1834
1835 static int cpu_map__get_max(struct cpu_map *map)
1836 {
1837         int i, max = -1;
1838
1839         for (i = 0; i < map->nr; i++) {
1840                 if (map->map[i] > max)
1841                         max = map->map[i];
1842         }
1843
1844         return max;
1845 }
1846
1847 static struct cpu_map *cpus_aggr_map;
1848
1849 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
1850 {
1851         int cpu;
1852
1853         if (idx >= map->nr)
1854                 return -1;
1855
1856         cpu = map->map[idx];
1857
1858         if (cpus_aggr_map->map[cpu] == -1)
1859                 cpus_aggr_map->map[cpu] = get_id(map, idx);
1860
1861         return cpus_aggr_map->map[cpu];
1862 }
1863
1864 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
1865 {
1866         return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
1867 }
1868
1869 static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
1870 {
1871         return perf_stat__get_aggr(perf_stat__get_core, map, idx);
1872 }
1873
1874 static int perf_stat_init_aggr_mode(void)
1875 {
1876         int nr;
1877
1878         switch (stat_config.aggr_mode) {
1879         case AGGR_SOCKET:
1880                 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1881                         perror("cannot build socket map");
1882                         return -1;
1883                 }
1884                 aggr_get_id = perf_stat__get_socket_cached;
1885                 break;
1886         case AGGR_CORE:
1887                 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1888                         perror("cannot build core map");
1889                         return -1;
1890                 }
1891                 aggr_get_id = perf_stat__get_core_cached;
1892                 break;
1893         case AGGR_NONE:
1894         case AGGR_GLOBAL:
1895         case AGGR_THREAD:
1896         case AGGR_UNSET:
1897         default:
1898                 break;
1899         }
1900
1901         /*
1902          * The evsel_list->cpus is the base we operate on,
1903          * taking the highest cpu number to be the size of
1904          * the aggregation translate cpumap.
1905          */
1906         nr = cpu_map__get_max(evsel_list->cpus);
1907         cpus_aggr_map = cpu_map__empty_new(nr + 1);
1908         return cpus_aggr_map ? 0 : -ENOMEM;
1909 }
1910
1911 static void perf_stat__exit_aggr_mode(void)
1912 {
1913         cpu_map__put(aggr_map);
1914         cpu_map__put(cpus_aggr_map);
1915         aggr_map = NULL;
1916         cpus_aggr_map = NULL;
1917 }
1918
1919 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
1920 {
1921         int cpu;
1922
1923         if (idx > map->nr)
1924                 return -1;
1925
1926         cpu = map->map[idx];
1927
1928         if (cpu >= env->nr_cpus_avail)
1929                 return -1;
1930
1931         return cpu;
1932 }
1933
1934 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
1935 {
1936         struct perf_env *env = data;
1937         int cpu = perf_env__get_cpu(env, map, idx);
1938
1939         return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
1940 }
1941
1942 static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
1943 {
1944         struct perf_env *env = data;
1945         int core = -1, cpu = perf_env__get_cpu(env, map, idx);
1946
1947         if (cpu != -1) {
1948                 int socket_id = env->cpu[cpu].socket_id;
1949
1950                 /*
1951                  * Encode socket in upper 16 bits
1952                  * core_id is relative to socket, and
1953                  * we need a global id. So we combine
1954                  * socket + core id.
1955                  */
1956                 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
1957         }
1958
1959         return core;
1960 }
1961
1962 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
1963                                       struct cpu_map **sockp)
1964 {
1965         return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1966 }
1967
1968 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
1969                                     struct cpu_map **corep)
1970 {
1971         return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1972 }
1973
1974 static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
1975 {
1976         return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1977 }
1978
1979 static int perf_stat__get_core_file(struct cpu_map *map, int idx)
1980 {
1981         return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1982 }
1983
1984 static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1985 {
1986         struct perf_env *env = &st->session->header.env;
1987
1988         switch (stat_config.aggr_mode) {
1989         case AGGR_SOCKET:
1990                 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
1991                         perror("cannot build socket map");
1992                         return -1;
1993                 }
1994                 aggr_get_id = perf_stat__get_socket_file;
1995                 break;
1996         case AGGR_CORE:
1997                 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
1998                         perror("cannot build core map");
1999                         return -1;
2000                 }
2001                 aggr_get_id = perf_stat__get_core_file;
2002                 break;
2003         case AGGR_NONE:
2004         case AGGR_GLOBAL:
2005         case AGGR_THREAD:
2006         case AGGR_UNSET:
2007         default:
2008                 break;
2009         }
2010
2011         return 0;
2012 }
2013
2014 static int topdown_filter_events(const char **attr, char **str, bool use_group)
2015 {
2016         int off = 0;
2017         int i;
2018         int len = 0;
2019         char *s;
2020
2021         for (i = 0; attr[i]; i++) {
2022                 if (pmu_have_event("cpu", attr[i])) {
2023                         len += strlen(attr[i]) + 1;
2024                         attr[i - off] = attr[i];
2025                 } else
2026                         off++;
2027         }
2028         attr[i - off] = NULL;
2029
2030         *str = malloc(len + 1 + 2);
2031         if (!*str)
2032                 return -1;
2033         s = *str;
2034         if (i - off == 0) {
2035                 *s = 0;
2036                 return 0;
2037         }
2038         if (use_group)
2039                 *s++ = '{';
2040         for (i = 0; attr[i]; i++) {
2041                 strcpy(s, attr[i]);
2042                 s += strlen(s);
2043                 *s++ = ',';
2044         }
2045         if (use_group) {
2046                 s[-1] = '}';
2047                 *s = 0;
2048         } else
2049                 s[-1] = 0;
2050         return 0;
2051 }
2052
2053 __weak bool arch_topdown_check_group(bool *warn)
2054 {
2055         *warn = false;
2056         return false;
2057 }
2058
2059 __weak void arch_topdown_group_warn(void)
2060 {
2061 }
2062
2063 /*
2064  * Add default attributes, if there were no attributes specified or
2065  * if -d/--detailed, -d -d or -d -d -d is used:
2066  */
2067 static int add_default_attributes(void)
2068 {
2069         int err;
2070         struct perf_event_attr default_attrs0[] = {
2071
2072   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
2073   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
2074   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
2075   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
2076
2077   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
2078 };
2079         struct perf_event_attr frontend_attrs[] = {
2080   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
2081 };
2082         struct perf_event_attr backend_attrs[] = {
2083   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
2084 };
2085         struct perf_event_attr default_attrs1[] = {
2086   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
2087   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
2088   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
2089
2090 };
2091
2092 /*
2093  * Detailed stats (-d), covering the L1 and last level data caches:
2094  */
2095         struct perf_event_attr detailed_attrs[] = {
2096
2097   { .type = PERF_TYPE_HW_CACHE,
2098     .config =
2099          PERF_COUNT_HW_CACHE_L1D                <<  0  |
2100         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2101         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2102
2103   { .type = PERF_TYPE_HW_CACHE,
2104     .config =
2105          PERF_COUNT_HW_CACHE_L1D                <<  0  |
2106         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2107         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2108
2109   { .type = PERF_TYPE_HW_CACHE,
2110     .config =
2111          PERF_COUNT_HW_CACHE_LL                 <<  0  |
2112         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2113         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2114
2115   { .type = PERF_TYPE_HW_CACHE,
2116     .config =
2117          PERF_COUNT_HW_CACHE_LL                 <<  0  |
2118         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2119         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2120 };
2121
2122 /*
2123  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
2124  */
2125         struct perf_event_attr very_detailed_attrs[] = {
2126
2127   { .type = PERF_TYPE_HW_CACHE,
2128     .config =
2129          PERF_COUNT_HW_CACHE_L1I                <<  0  |
2130         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2131         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2132
2133   { .type = PERF_TYPE_HW_CACHE,
2134     .config =
2135          PERF_COUNT_HW_CACHE_L1I                <<  0  |
2136         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2137         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2138
2139   { .type = PERF_TYPE_HW_CACHE,
2140     .config =
2141          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
2142         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2143         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2144
2145   { .type = PERF_TYPE_HW_CACHE,
2146     .config =
2147          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
2148         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2149         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2150
2151   { .type = PERF_TYPE_HW_CACHE,
2152     .config =
2153          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
2154         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2155         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2156
2157   { .type = PERF_TYPE_HW_CACHE,
2158     .config =
2159          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
2160         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
2161         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2162
2163 };
2164
2165 /*
2166  * Very, very detailed stats (-d -d -d), adding prefetch events:
2167  */
2168         struct perf_event_attr very_very_detailed_attrs[] = {
2169
2170   { .type = PERF_TYPE_HW_CACHE,
2171     .config =
2172          PERF_COUNT_HW_CACHE_L1D                <<  0  |
2173         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
2174         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
2175
2176   { .type = PERF_TYPE_HW_CACHE,
2177     .config =
2178          PERF_COUNT_HW_CACHE_L1D                <<  0  |
2179         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
2180         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
2181 };
2182
2183         /* Set attrs if no event is selected and !null_run: */
2184         if (null_run)
2185                 return 0;
2186
2187         if (transaction_run) {
2188                 struct parse_events_error errinfo;
2189
2190                 if (pmu_have_event("cpu", "cycles-ct") &&
2191                     pmu_have_event("cpu", "el-start"))
2192                         err = parse_events(evsel_list, transaction_attrs,
2193                                            &errinfo);
2194                 else
2195                         err = parse_events(evsel_list,
2196                                            transaction_limited_attrs,
2197                                            &errinfo);
2198                 if (err) {
2199                         fprintf(stderr, "Cannot set up transaction events\n");
2200                         return -1;
2201                 }
2202                 return 0;
2203         }
2204
2205         if (smi_cost) {
2206                 int smi;
2207
2208                 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
2209                         fprintf(stderr, "freeze_on_smi is not supported.\n");
2210                         return -1;
2211                 }
2212
2213                 if (!smi) {
2214                         if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
2215                                 fprintf(stderr, "Failed to set freeze_on_smi.\n");
2216                                 return -1;
2217                         }
2218                         smi_reset = true;
2219                 }
2220
2221                 if (pmu_have_event("msr", "aperf") &&
2222                     pmu_have_event("msr", "smi")) {
2223                         if (!force_metric_only)
2224                                 metric_only = true;
2225                         err = parse_events(evsel_list, smi_cost_attrs, NULL);
2226                 } else {
2227                         fprintf(stderr, "To measure SMI cost, it needs "
2228                                 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
2229                         return -1;
2230                 }
2231                 if (err) {
2232                         fprintf(stderr, "Cannot set up SMI cost events\n");
2233                         return -1;
2234                 }
2235                 return 0;
2236         }
2237
2238         if (topdown_run) {
2239                 char *str = NULL;
2240                 bool warn = false;
2241
2242                 if (stat_config.aggr_mode != AGGR_GLOBAL &&
2243                     stat_config.aggr_mode != AGGR_CORE) {
2244                         pr_err("top down event configuration requires --per-core mode\n");
2245                         return -1;
2246                 }
2247                 stat_config.aggr_mode = AGGR_CORE;
2248                 if (nr_cgroups || !target__has_cpu(&target)) {
2249                         pr_err("top down event configuration requires system-wide mode (-a)\n");
2250                         return -1;
2251                 }
2252
2253                 if (!force_metric_only)
2254                         metric_only = true;
2255                 if (topdown_filter_events(topdown_attrs, &str,
2256                                 arch_topdown_check_group(&warn)) < 0) {
2257                         pr_err("Out of memory\n");
2258                         return -1;
2259                 }
2260                 if (topdown_attrs[0] && str) {
2261                         if (warn)
2262                                 arch_topdown_group_warn();
2263                         err = parse_events(evsel_list, str, NULL);
2264                         if (err) {
2265                                 fprintf(stderr,
2266                                         "Cannot set up top down events %s: %d\n",
2267                                         str, err);
2268                                 free(str);
2269                                 return -1;
2270                         }
2271                 } else {
2272                         fprintf(stderr, "System does not support topdown\n");
2273                         return -1;
2274                 }
2275                 free(str);
2276         }
2277
2278         if (!evsel_list->nr_entries) {
2279                 if (target__has_cpu(&target))
2280                         default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
2281
2282                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
2283                         return -1;
2284                 if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
2285                         if (perf_evlist__add_default_attrs(evsel_list,
2286                                                 frontend_attrs) < 0)
2287                                 return -1;
2288                 }
2289                 if (pmu_have_event("cpu", "stalled-cycles-backend")) {
2290                         if (perf_evlist__add_default_attrs(evsel_list,
2291                                                 backend_attrs) < 0)
2292                                 return -1;
2293                 }
2294                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
2295                         return -1;
2296         }
2297
2298         /* Detailed events get appended to the event list: */
2299
2300         if (detailed_run <  1)
2301                 return 0;
2302
2303         /* Append detailed run extra attributes: */
2304         if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
2305                 return -1;
2306
2307         if (detailed_run < 2)
2308                 return 0;
2309
2310         /* Append very detailed run extra attributes: */
2311         if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
2312                 return -1;
2313
2314         if (detailed_run < 3)
2315                 return 0;
2316
2317         /* Append very, very detailed run extra attributes: */
2318         return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
2319 }
2320
2321 static const char * const stat_record_usage[] = {
2322         "perf stat record [<options>]",
2323         NULL,
2324 };
2325
2326 static void init_features(struct perf_session *session)
2327 {
2328         int feat;
2329
2330         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
2331                 perf_header__set_feat(&session->header, feat);
2332
2333         perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
2334         perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
2335         perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
2336         perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
2337 }
2338
2339 static int __cmd_record(int argc, const char **argv)
2340 {
2341         struct perf_session *session;
2342         struct perf_data_file *file = &perf_stat.file;
2343
2344         argc = parse_options(argc, argv, stat_options, stat_record_usage,
2345                              PARSE_OPT_STOP_AT_NON_OPTION);
2346
2347         if (output_name)
2348                 file->path = output_name;
2349
2350         if (run_count != 1 || forever) {
2351                 pr_err("Cannot use -r option with perf stat record.\n");
2352                 return -1;
2353         }
2354
2355         session = perf_session__new(file, false, NULL);
2356         if (session == NULL) {
2357                 pr_err("Perf session creation failed.\n");
2358                 return -1;
2359         }
2360
2361         init_features(session);
2362
2363         session->evlist   = evsel_list;
2364         perf_stat.session = session;
2365         perf_stat.record  = true;
2366         return argc;
2367 }
2368
2369 static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
2370                                     union perf_event *event,
2371                                     struct perf_session *session)
2372 {
2373         struct stat_round_event *stat_round = &event->stat_round;
2374         struct perf_evsel *counter;
2375         struct timespec tsh, *ts = NULL;
2376         const char **argv = session->header.env.cmdline_argv;
2377         int argc = session->header.env.nr_cmdline;
2378
2379         evlist__for_each_entry(evsel_list, counter)
2380                 perf_stat_process_counter(&stat_config, counter);
2381
2382         if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2383                 update_stats(&walltime_nsecs_stats, stat_round->time);
2384
2385         if (stat_config.interval && stat_round->time) {
2386                 tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
2387                 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
2388                 ts = &tsh;
2389         }
2390
2391         print_counters(ts, argc, argv);
2392         return 0;
2393 }
2394
2395 static
2396 int process_stat_config_event(struct perf_tool *tool,
2397                               union perf_event *event,
2398                               struct perf_session *session __maybe_unused)
2399 {
2400         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2401
2402         perf_event__read_stat_config(&stat_config, &event->stat_config);
2403
2404         if (cpu_map__empty(st->cpus)) {
2405                 if (st->aggr_mode != AGGR_UNSET)
2406                         pr_warning("warning: processing task data, aggregation mode not set\n");
2407                 return 0;
2408         }
2409
2410         if (st->aggr_mode != AGGR_UNSET)
2411                 stat_config.aggr_mode = st->aggr_mode;
2412
2413         if (perf_stat.file.is_pipe)
2414                 perf_stat_init_aggr_mode();
2415         else
2416                 perf_stat_init_aggr_mode_file(st);
2417
2418         return 0;
2419 }
2420
2421 static int set_maps(struct perf_stat *st)
2422 {
2423         if (!st->cpus || !st->threads)
2424                 return 0;
2425
2426         if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2427                 return -EINVAL;
2428
2429         perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
2430
2431         if (perf_evlist__alloc_stats(evsel_list, true))
2432                 return -ENOMEM;
2433
2434         st->maps_allocated = true;
2435         return 0;
2436 }
2437
2438 static
2439 int process_thread_map_event(struct perf_tool *tool,
2440                              union perf_event *event,
2441                              struct perf_session *session __maybe_unused)
2442 {
2443         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2444
2445         if (st->threads) {
2446                 pr_warning("Extra thread map event, ignoring.\n");
2447                 return 0;
2448         }
2449
2450         st->threads = thread_map__new_event(&event->thread_map);
2451         if (!st->threads)
2452                 return -ENOMEM;
2453
2454         return set_maps(st);
2455 }
2456
2457 static
2458 int process_cpu_map_event(struct perf_tool *tool,
2459                           union perf_event *event,
2460                           struct perf_session *session __maybe_unused)
2461 {
2462         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2463         struct cpu_map *cpus;
2464
2465         if (st->cpus) {
2466                 pr_warning("Extra cpu map event, ignoring.\n");
2467                 return 0;
2468         }
2469
2470         cpus = cpu_map__new_data(&event->cpu_map.data);
2471         if (!cpus)
2472                 return -ENOMEM;
2473
2474         st->cpus = cpus;
2475         return set_maps(st);
2476 }
2477
2478 static const char * const stat_report_usage[] = {
2479         "perf stat report [<options>]",
2480         NULL,
2481 };
2482
2483 static struct perf_stat perf_stat = {
2484         .tool = {
2485                 .attr           = perf_event__process_attr,
2486                 .event_update   = perf_event__process_event_update,
2487                 .thread_map     = process_thread_map_event,
2488                 .cpu_map        = process_cpu_map_event,
2489                 .stat_config    = process_stat_config_event,
2490                 .stat           = perf_event__process_stat_event,
2491                 .stat_round     = process_stat_round_event,
2492         },
2493         .aggr_mode = AGGR_UNSET,
2494 };
2495
2496 static int __cmd_report(int argc, const char **argv)
2497 {
2498         struct perf_session *session;
2499         const struct option options[] = {
2500         OPT_STRING('i', "input", &input_name, "file", "input file name"),
2501         OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2502                      "aggregate counts per processor socket", AGGR_SOCKET),
2503         OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2504                      "aggregate counts per physical processor core", AGGR_CORE),
2505         OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2506                      "disable CPU count aggregation", AGGR_NONE),
2507         OPT_END()
2508         };
2509         struct stat st;
2510         int ret;
2511
2512         argc = parse_options(argc, argv, options, stat_report_usage, 0);
2513
2514         if (!input_name || !strlen(input_name)) {
2515                 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
2516                         input_name = "-";
2517                 else
2518                         input_name = "perf.data";
2519         }
2520
2521         perf_stat.file.path = input_name;
2522         perf_stat.file.mode = PERF_DATA_MODE_READ;
2523
2524         session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
2525         if (session == NULL)
2526                 return -1;
2527
2528         perf_stat.session  = session;
2529         stat_config.output = stderr;
2530         evsel_list         = session->evlist;
2531
2532         ret = perf_session__process_events(session);
2533         if (ret)
2534                 return ret;
2535
2536         perf_session__delete(session);
2537         return 0;
2538 }
2539
2540 static void setup_system_wide(int forks)
2541 {
2542         /*
2543          * Make system wide (-a) the default target if
2544          * no target was specified and one of following
2545          * conditions is met:
2546          *
2547          *   - there's no workload specified
2548          *   - there is workload specified but all requested
2549          *     events are system wide events
2550          */
2551         if (!target__none(&target))
2552                 return;
2553
2554         if (!forks)
2555                 target.system_wide = true;
2556         else {
2557                 struct perf_evsel *counter;
2558
2559                 evlist__for_each_entry(evsel_list, counter) {
2560                         if (!counter->system_wide)
2561                                 return;
2562                 }
2563
2564                 if (evsel_list->nr_entries)
2565                         target.system_wide = true;
2566         }
2567 }
2568
2569 int cmd_stat(int argc, const char **argv)
2570 {
2571         const char * const stat_usage[] = {
2572                 "perf stat [<options>] [<command>]",
2573                 NULL
2574         };
2575         int status = -EINVAL, run_idx;
2576         const char *mode;
2577         FILE *output = stderr;
2578         unsigned int interval;
2579         const char * const stat_subcommands[] = { "record", "report" };
2580
2581         setlocale(LC_ALL, "");
2582
2583         evsel_list = perf_evlist__new();
2584         if (evsel_list == NULL)
2585                 return -ENOMEM;
2586
2587         parse_events__shrink_config_terms();
2588         argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2589                                         (const char **) stat_usage,
2590                                         PARSE_OPT_STOP_AT_NON_OPTION);
2591         perf_stat__collect_metric_expr(evsel_list);
2592         perf_stat__init_shadow_stats();
2593
2594         if (csv_sep) {
2595                 csv_output = true;
2596                 if (!strcmp(csv_sep, "\\t"))
2597                         csv_sep = "\t";
2598         } else
2599                 csv_sep = DEFAULT_SEPARATOR;
2600
2601         if (argc && !strncmp(argv[0], "rec", 3)) {
2602                 argc = __cmd_record(argc, argv);
2603                 if (argc < 0)
2604                         return -1;
2605         } else if (argc && !strncmp(argv[0], "rep", 3))
2606                 return __cmd_report(argc, argv);
2607
2608         interval = stat_config.interval;
2609
2610         /*
2611          * For record command the -o is already taken care of.
2612          */
2613         if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2614                 output = NULL;
2615
2616         if (output_name && output_fd) {
2617                 fprintf(stderr, "cannot use both --output and --log-fd\n");
2618                 parse_options_usage(stat_usage, stat_options, "o", 1);
2619                 parse_options_usage(NULL, stat_options, "log-fd", 0);
2620                 goto out;
2621         }
2622
2623         if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2624                 fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2625                 goto out;
2626         }
2627
2628         if (metric_only && run_count > 1) {
2629                 fprintf(stderr, "--metric-only is not supported with -r\n");
2630                 goto out;
2631         }
2632
2633         if (output_fd < 0) {
2634                 fprintf(stderr, "argument to --log-fd must be a > 0\n");
2635                 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2636                 goto out;
2637         }
2638
2639         if (!output) {
2640                 struct timespec tm;
2641                 mode = append_file ? "a" : "w";
2642
2643                 output = fopen(output_name, mode);
2644                 if (!output) {
2645                         perror("failed to create output file");
2646                         return -1;
2647                 }
2648                 clock_gettime(CLOCK_REALTIME, &tm);
2649                 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2650         } else if (output_fd > 0) {
2651                 mode = append_file ? "a" : "w";
2652                 output = fdopen(output_fd, mode);
2653                 if (!output) {
2654                         perror("Failed opening logfd");
2655                         return -errno;
2656                 }
2657         }
2658
2659         stat_config.output = output;
2660
2661         /*
2662          * let the spreadsheet do the pretty-printing
2663          */
2664         if (csv_output) {
2665                 /* User explicitly passed -B? */
2666                 if (big_num_opt == 1) {
2667                         fprintf(stderr, "-B option not supported with -x\n");
2668                         parse_options_usage(stat_usage, stat_options, "B", 1);
2669                         parse_options_usage(NULL, stat_options, "x", 1);
2670                         goto out;
2671                 } else /* Nope, so disable big number formatting */
2672                         big_num = false;
2673         } else if (big_num_opt == 0) /* User passed --no-big-num */
2674                 big_num = false;
2675
2676         setup_system_wide(argc);
2677
2678         if (run_count < 0) {
2679                 pr_err("Run count must be a positive number\n");
2680                 parse_options_usage(stat_usage, stat_options, "r", 1);
2681                 goto out;
2682         } else if (run_count == 0) {
2683                 forever = true;
2684                 run_count = 1;
2685         }
2686
2687         if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
2688                 fprintf(stderr, "The --per-thread option is only available "
2689                         "when monitoring via -p -t options.\n");
2690                 parse_options_usage(NULL, stat_options, "p", 1);
2691                 parse_options_usage(NULL, stat_options, "t", 1);
2692                 goto out;
2693         }
2694
2695         /*
2696          * no_aggr, cgroup are for system-wide only
2697          * --per-thread is aggregated per thread, we dont mix it with cpu mode
2698          */
2699         if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2700               stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
2701             !target__has_cpu(&target)) {
2702                 fprintf(stderr, "both cgroup and no-aggregation "
2703                         "modes only available in system-wide mode\n");
2704
2705                 parse_options_usage(stat_usage, stat_options, "G", 1);
2706                 parse_options_usage(NULL, stat_options, "A", 1);
2707                 parse_options_usage(NULL, stat_options, "a", 1);
2708                 goto out;
2709         }
2710
2711         if (add_default_attributes())
2712                 goto out;
2713
2714         target__validate(&target);
2715
2716         if (perf_evlist__create_maps(evsel_list, &target) < 0) {
2717                 if (target__has_task(&target)) {
2718                         pr_err("Problems finding threads of monitor\n");
2719                         parse_options_usage(stat_usage, stat_options, "p", 1);
2720                         parse_options_usage(NULL, stat_options, "t", 1);
2721                 } else if (target__has_cpu(&target)) {
2722                         perror("failed to parse CPUs map");
2723                         parse_options_usage(stat_usage, stat_options, "C", 1);
2724                         parse_options_usage(NULL, stat_options, "a", 1);
2725                 }
2726                 goto out;
2727         }
2728
2729         /*
2730          * Initialize thread_map with comm names,
2731          * so we could print it out on output.
2732          */
2733         if (stat_config.aggr_mode == AGGR_THREAD)
2734                 thread_map__read_comms(evsel_list->threads);
2735
2736         if (interval && interval < 100) {
2737                 if (interval < 10) {
2738                         pr_err("print interval must be >= 10ms\n");
2739                         parse_options_usage(stat_usage, stat_options, "I", 1);
2740                         goto out;
2741                 } else
2742                         pr_warning("print interval < 100ms. "
2743                                    "The overhead percentage could be high in some cases. "
2744                                    "Please proceed with caution.\n");
2745         }
2746
2747         if (perf_evlist__alloc_stats(evsel_list, interval))
2748                 goto out;
2749
2750         if (perf_stat_init_aggr_mode())
2751                 goto out;
2752
2753         /*
2754          * We dont want to block the signals - that would cause
2755          * child tasks to inherit that and Ctrl-C would not work.
2756          * What we want is for Ctrl-C to work in the exec()-ed
2757          * task, but being ignored by perf stat itself:
2758          */
2759         atexit(sig_atexit);
2760         if (!forever)
2761                 signal(SIGINT,  skip_signal);
2762         signal(SIGCHLD, skip_signal);
2763         signal(SIGALRM, skip_signal);
2764         signal(SIGABRT, skip_signal);
2765
2766         status = 0;
2767         for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
2768                 if (run_count != 1 && verbose > 0)
2769                         fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2770                                 run_idx + 1);
2771
2772                 if (run_idx != 0)
2773                         perf_evlist__reset_prev_raw_counts(evsel_list);
2774
2775                 status = run_perf_stat(argc, argv);
2776                 if (forever && status != -1 && !interval) {
2777                         print_counters(NULL, argc, argv);
2778                         perf_stat__reset_stats();
2779                 }
2780         }
2781
2782         if (!forever && status != -1 && !interval)
2783                 print_counters(NULL, argc, argv);
2784
2785         if (STAT_RECORD) {
2786                 /*
2787                  * We synthesize the kernel mmap record just so that older tools
2788                  * don't emit warnings about not being able to resolve symbols
2789                  * due to /proc/sys/kernel/kptr_restrict settings and instear provide
2790                  * a saner message about no samples being in the perf.data file.
2791                  *
2792                  * This also serves to suppress a warning about f_header.data.size == 0
2793                  * in header.c at the moment 'perf stat record' gets introduced, which
2794                  * is not really needed once we start adding the stat specific PERF_RECORD_
2795                  * records, but the need to suppress the kptr_restrict messages in older
2796                  * tools remain  -acme
2797                  */
2798                 int fd = perf_data_file__fd(&perf_stat.file);
2799                 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2800                                                              process_synthesized_event,
2801                                                              &perf_stat.session->machines.host);
2802                 if (err) {
2803                         pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2804                                    "older tools may produce warnings about this file\n.");
2805                 }
2806
2807                 if (!interval) {
2808                         if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2809                                 pr_err("failed to write stat round event\n");
2810                 }
2811
2812                 if (!perf_stat.file.is_pipe) {
2813                         perf_stat.session->header.data_size += perf_stat.bytes_written;
2814                         perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2815                 }
2816
2817                 perf_session__delete(perf_stat.session);
2818         }
2819
2820         perf_stat__exit_aggr_mode();
2821         perf_evlist__free_stats(evsel_list);
2822 out:
2823         if (smi_cost && smi_reset)
2824                 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2825
2826         perf_evlist__delete(evsel_list);
2827         return status;
2828 }