GNU Linux-libre 4.14.332-gnu1
[releases.git] / tools / perf / util / stat-shadow.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stdio.h>
3 #include "evsel.h"
4 #include "stat.h"
5 #include "color.h"
6 #include "pmu.h"
7 #include "rblist.h"
8 #include "evlist.h"
9 #include "expr.h"
10
11 enum {
12         CTX_BIT_USER    = 1 << 0,
13         CTX_BIT_KERNEL  = 1 << 1,
14         CTX_BIT_HV      = 1 << 2,
15         CTX_BIT_HOST    = 1 << 3,
16         CTX_BIT_IDLE    = 1 << 4,
17         CTX_BIT_MAX     = 1 << 5,
18 };
19
20 #define NUM_CTX CTX_BIT_MAX
21
22 /*
23  * AGGR_GLOBAL: Use CPU 0
24  * AGGR_SOCKET: Use first CPU of socket
25  * AGGR_CORE: Use first CPU of core
26  * AGGR_NONE: Use matching CPU
27  * AGGR_THREAD: Not supported?
28  */
29 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
30 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
31 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
32 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
33 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
34 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
35 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
36 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
37 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
38 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
39 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
40 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
41 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
42 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
43 static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
44 static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
45 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
46 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
47 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
48 static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
49 static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
50 static struct rblist runtime_saved_values;
51 static bool have_frontend_stalled;
52
53 struct stats walltime_nsecs_stats;
54
55 struct saved_value {
56         struct rb_node rb_node;
57         struct perf_evsel *evsel;
58         int cpu;
59         int ctx;
60         struct stats stats;
61 };
62
63 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
64 {
65         struct saved_value *a = container_of(rb_node,
66                                              struct saved_value,
67                                              rb_node);
68         const struct saved_value *b = entry;
69
70         if (a->ctx != b->ctx)
71                 return a->ctx - b->ctx;
72         if (a->cpu != b->cpu)
73                 return a->cpu - b->cpu;
74         if (a->evsel == b->evsel)
75                 return 0;
76         if ((char *)a->evsel < (char *)b->evsel)
77                 return -1;
78         return +1;
79 }
80
81 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
82                                      const void *entry)
83 {
84         struct saved_value *nd = malloc(sizeof(struct saved_value));
85
86         if (!nd)
87                 return NULL;
88         memcpy(nd, entry, sizeof(struct saved_value));
89         return &nd->rb_node;
90 }
91
92 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
93                                               int cpu, int ctx,
94                                               bool create)
95 {
96         struct rb_node *nd;
97         struct saved_value dm = {
98                 .cpu = cpu,
99                 .ctx = ctx,
100                 .evsel = evsel,
101         };
102         nd = rblist__find(&runtime_saved_values, &dm);
103         if (nd)
104                 return container_of(nd, struct saved_value, rb_node);
105         if (create) {
106                 rblist__add_node(&runtime_saved_values, &dm);
107                 nd = rblist__find(&runtime_saved_values, &dm);
108                 if (nd)
109                         return container_of(nd, struct saved_value, rb_node);
110         }
111         return NULL;
112 }
113
114 void perf_stat__init_shadow_stats(void)
115 {
116         have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
117         rblist__init(&runtime_saved_values);
118         runtime_saved_values.node_cmp = saved_value_cmp;
119         runtime_saved_values.node_new = saved_value_new;
120         /* No delete for now */
121 }
122
123 static int evsel_context(struct perf_evsel *evsel)
124 {
125         int ctx = 0;
126
127         if (evsel->attr.exclude_kernel)
128                 ctx |= CTX_BIT_KERNEL;
129         if (evsel->attr.exclude_user)
130                 ctx |= CTX_BIT_USER;
131         if (evsel->attr.exclude_hv)
132                 ctx |= CTX_BIT_HV;
133         if (evsel->attr.exclude_host)
134                 ctx |= CTX_BIT_HOST;
135         if (evsel->attr.exclude_idle)
136                 ctx |= CTX_BIT_IDLE;
137
138         return ctx;
139 }
140
141 void perf_stat__reset_shadow_stats(void)
142 {
143         struct rb_node *pos, *next;
144
145         memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
146         memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
147         memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
148         memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
149         memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
150         memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
151         memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
152         memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
153         memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
154         memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
155         memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
156         memset(runtime_cycles_in_tx_stats, 0,
157                         sizeof(runtime_cycles_in_tx_stats));
158         memset(runtime_transaction_stats, 0,
159                 sizeof(runtime_transaction_stats));
160         memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
161         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
162         memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
163         memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
164         memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
165         memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
166         memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
167         memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
168         memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
169
170         next = rb_first(&runtime_saved_values.entries);
171         while (next) {
172                 pos = next;
173                 next = rb_next(pos);
174                 memset(&container_of(pos, struct saved_value, rb_node)->stats,
175                        0,
176                        sizeof(struct stats));
177         }
178 }
179
180 /*
181  * Update various tracking values we maintain to print
182  * more semantic information such as miss/hit ratios,
183  * instruction rates, etc:
184  */
185 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
186                                     int cpu)
187 {
188         int ctx = evsel_context(counter);
189
190         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
191             perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
192                 update_stats(&runtime_nsecs_stats[cpu], count[0]);
193         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
194                 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
195         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
196                 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
197         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
198                 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
199         else if (perf_stat_evsel__is(counter, ELISION_START))
200                 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
201         else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
202                 update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
203         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
204                 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
205         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
206                 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
207         else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
208                 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
209         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
210                 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
211         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
212                 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
213         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
214                 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
215         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
216                 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
217         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
218                 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
219         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
220                 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
221         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
222                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
223         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
224                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
225         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
226                 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
227         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
228                 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
229         else if (perf_stat_evsel__is(counter, SMI_NUM))
230                 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
231         else if (perf_stat_evsel__is(counter, APERF))
232                 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
233
234         if (counter->collect_stat) {
235                 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
236                                                            true);
237                 update_stats(&v->stats, count[0]);
238         }
239 }
240
241 /* used for get_ratio_color() */
242 enum grc_type {
243         GRC_STALLED_CYCLES_FE,
244         GRC_STALLED_CYCLES_BE,
245         GRC_CACHE_MISSES,
246         GRC_MAX_NR
247 };
248
249 static const char *get_ratio_color(enum grc_type type, double ratio)
250 {
251         static const double grc_table[GRC_MAX_NR][3] = {
252                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
253                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
254                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
255         };
256         const char *color = PERF_COLOR_NORMAL;
257
258         if (ratio > grc_table[type][0])
259                 color = PERF_COLOR_RED;
260         else if (ratio > grc_table[type][1])
261                 color = PERF_COLOR_MAGENTA;
262         else if (ratio > grc_table[type][2])
263                 color = PERF_COLOR_YELLOW;
264
265         return color;
266 }
267
268 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
269                                                 const char *name)
270 {
271         struct perf_evsel *c2;
272
273         evlist__for_each_entry (evsel_list, c2) {
274                 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
275                         return c2;
276         }
277         return NULL;
278 }
279
280 /* Mark MetricExpr target events and link events using them to them. */
281 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
282 {
283         struct perf_evsel *counter, *leader, **metric_events, *oc;
284         bool found;
285         const char **metric_names;
286         int i;
287         int num_metric_names;
288
289         evlist__for_each_entry(evsel_list, counter) {
290                 bool invalid = false;
291
292                 leader = counter->leader;
293                 if (!counter->metric_expr)
294                         continue;
295                 metric_events = counter->metric_events;
296                 if (!metric_events) {
297                         if (expr__find_other(counter->metric_expr, counter->name,
298                                                 &metric_names, &num_metric_names) < 0)
299                                 continue;
300
301                         metric_events = calloc(sizeof(struct perf_evsel *),
302                                                num_metric_names + 1);
303                         if (!metric_events)
304                                 return;
305                         counter->metric_events = metric_events;
306                 }
307
308                 for (i = 0; i < num_metric_names; i++) {
309                         found = false;
310                         if (leader) {
311                                 /* Search in group */
312                                 for_each_group_member (oc, leader) {
313                                         if (!strcasecmp(oc->name, metric_names[i]) &&
314                                                 !oc->collect_stat) {
315                                                 found = true;
316                                                 break;
317                                         }
318                                 }
319                         }
320                         if (!found) {
321                                 /* Search ignoring groups */
322                                 oc = perf_stat__find_event(evsel_list, metric_names[i]);
323                         }
324                         if (!oc) {
325                                 /* Deduping one is good enough to handle duplicated PMUs. */
326                                 static char *printed;
327
328                                 /*
329                                  * Adding events automatically would be difficult, because
330                                  * it would risk creating groups that are not schedulable.
331                                  * perf stat doesn't understand all the scheduling constraints
332                                  * of events. So we ask the user instead to add the missing
333                                  * events.
334                                  */
335                                 if (!printed || strcasecmp(printed, metric_names[i])) {
336                                         fprintf(stderr,
337                                                 "Add %s event to groups to get metric expression for %s\n",
338                                                 metric_names[i],
339                                                 counter->name);
340                                         printed = strdup(metric_names[i]);
341                                 }
342                                 invalid = true;
343                                 continue;
344                         }
345                         metric_events[i] = oc;
346                         oc->collect_stat = true;
347                 }
348                 metric_events[i] = NULL;
349                 free(metric_names);
350                 if (invalid) {
351                         free(metric_events);
352                         counter->metric_events = NULL;
353                         counter->metric_expr = NULL;
354                 }
355         }
356 }
357
358 static void print_stalled_cycles_frontend(int cpu,
359                                           struct perf_evsel *evsel, double avg,
360                                           struct perf_stat_output_ctx *out)
361 {
362         double total, ratio = 0.0;
363         const char *color;
364         int ctx = evsel_context(evsel);
365
366         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
367
368         if (total)
369                 ratio = avg / total * 100.0;
370
371         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
372
373         if (ratio)
374                 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
375                                   ratio);
376         else
377                 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
378 }
379
380 static void print_stalled_cycles_backend(int cpu,
381                                          struct perf_evsel *evsel, double avg,
382                                          struct perf_stat_output_ctx *out)
383 {
384         double total, ratio = 0.0;
385         const char *color;
386         int ctx = evsel_context(evsel);
387
388         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
389
390         if (total)
391                 ratio = avg / total * 100.0;
392
393         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
394
395         out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
396 }
397
398 static void print_branch_misses(int cpu,
399                                 struct perf_evsel *evsel,
400                                 double avg,
401                                 struct perf_stat_output_ctx *out)
402 {
403         double total, ratio = 0.0;
404         const char *color;
405         int ctx = evsel_context(evsel);
406
407         total = avg_stats(&runtime_branches_stats[ctx][cpu]);
408
409         if (total)
410                 ratio = avg / total * 100.0;
411
412         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
413
414         out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
415 }
416
417 static void print_l1_dcache_misses(int cpu,
418                                    struct perf_evsel *evsel,
419                                    double avg,
420                                    struct perf_stat_output_ctx *out)
421 {
422         double total, ratio = 0.0;
423         const char *color;
424         int ctx = evsel_context(evsel);
425
426         total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
427
428         if (total)
429                 ratio = avg / total * 100.0;
430
431         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
432
433         out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
434 }
435
436 static void print_l1_icache_misses(int cpu,
437                                    struct perf_evsel *evsel,
438                                    double avg,
439                                    struct perf_stat_output_ctx *out)
440 {
441         double total, ratio = 0.0;
442         const char *color;
443         int ctx = evsel_context(evsel);
444
445         total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
446
447         if (total)
448                 ratio = avg / total * 100.0;
449
450         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
451         out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
452 }
453
454 static void print_dtlb_cache_misses(int cpu,
455                                     struct perf_evsel *evsel,
456                                     double avg,
457                                     struct perf_stat_output_ctx *out)
458 {
459         double total, ratio = 0.0;
460         const char *color;
461         int ctx = evsel_context(evsel);
462
463         total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
464
465         if (total)
466                 ratio = avg / total * 100.0;
467
468         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
469         out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
470 }
471
472 static void print_itlb_cache_misses(int cpu,
473                                     struct perf_evsel *evsel,
474                                     double avg,
475                                     struct perf_stat_output_ctx *out)
476 {
477         double total, ratio = 0.0;
478         const char *color;
479         int ctx = evsel_context(evsel);
480
481         total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
482
483         if (total)
484                 ratio = avg / total * 100.0;
485
486         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
487         out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
488 }
489
490 static void print_ll_cache_misses(int cpu,
491                                   struct perf_evsel *evsel,
492                                   double avg,
493                                   struct perf_stat_output_ctx *out)
494 {
495         double total, ratio = 0.0;
496         const char *color;
497         int ctx = evsel_context(evsel);
498
499         total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
500
501         if (total)
502                 ratio = avg / total * 100.0;
503
504         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
505         out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
506 }
507
508 /*
509  * High level "TopDown" CPU core pipe line bottleneck break down.
510  *
511  * Basic concept following
512  * Yasin, A Top Down Method for Performance analysis and Counter architecture
513  * ISPASS14
514  *
515  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
516  *
517  * Frontend -> Backend -> Retiring
518  * BadSpeculation in addition means out of order execution that is thrown away
519  * (for example branch mispredictions)
520  * Frontend is instruction decoding.
521  * Backend is execution, like computation and accessing data in memory
522  * Retiring is good execution that is not directly bottlenecked
523  *
524  * The formulas are computed in slots.
525  * A slot is an entry in the pipeline each for the pipeline width
526  * (for example a 4-wide pipeline has 4 slots for each cycle)
527  *
528  * Formulas:
529  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
530  *                      TotalSlots
531  * Retiring = SlotsRetired / TotalSlots
532  * FrontendBound = FetchBubbles / TotalSlots
533  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
534  *
535  * The kernel provides the mapping to the low level CPU events and any scaling
536  * needed for the CPU pipeline width, for example:
537  *
538  * TotalSlots = Cycles * 4
539  *
540  * The scaling factor is communicated in the sysfs unit.
541  *
542  * In some cases the CPU may not be able to measure all the formulas due to
543  * missing events. In this case multiple formulas are combined, as possible.
544  *
545  * Full TopDown supports more levels to sub-divide each area: for example
546  * BackendBound into computing bound and memory bound. For now we only
547  * support Level 1 TopDown.
548  */
549
550 static double sanitize_val(double x)
551 {
552         if (x < 0 && x >= -0.02)
553                 return 0.0;
554         return x;
555 }
556
557 static double td_total_slots(int ctx, int cpu)
558 {
559         return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
560 }
561
562 static double td_bad_spec(int ctx, int cpu)
563 {
564         double bad_spec = 0;
565         double total_slots;
566         double total;
567
568         total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
569                 avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
570                 avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
571         total_slots = td_total_slots(ctx, cpu);
572         if (total_slots)
573                 bad_spec = total / total_slots;
574         return sanitize_val(bad_spec);
575 }
576
577 static double td_retiring(int ctx, int cpu)
578 {
579         double retiring = 0;
580         double total_slots = td_total_slots(ctx, cpu);
581         double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
582
583         if (total_slots)
584                 retiring = ret_slots / total_slots;
585         return retiring;
586 }
587
588 static double td_fe_bound(int ctx, int cpu)
589 {
590         double fe_bound = 0;
591         double total_slots = td_total_slots(ctx, cpu);
592         double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
593
594         if (total_slots)
595                 fe_bound = fetch_bub / total_slots;
596         return fe_bound;
597 }
598
599 static double td_be_bound(int ctx, int cpu)
600 {
601         double sum = (td_fe_bound(ctx, cpu) +
602                       td_bad_spec(ctx, cpu) +
603                       td_retiring(ctx, cpu));
604         if (sum == 0)
605                 return 0;
606         return sanitize_val(1.0 - sum);
607 }
608
609 static void print_smi_cost(int cpu, struct perf_evsel *evsel,
610                            struct perf_stat_output_ctx *out)
611 {
612         double smi_num, aperf, cycles, cost = 0.0;
613         int ctx = evsel_context(evsel);
614         const char *color = NULL;
615
616         smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
617         aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
618         cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
619
620         if ((cycles == 0) || (aperf == 0))
621                 return;
622
623         if (smi_num)
624                 cost = (aperf - cycles) / aperf * 100.00;
625
626         if (cost > 10)
627                 color = PERF_COLOR_RED;
628         out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
629         out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
630 }
631
632 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
633                                    double avg, int cpu,
634                                    struct perf_stat_output_ctx *out)
635 {
636         void *ctxp = out->ctx;
637         print_metric_t print_metric = out->print_metric;
638         double total, ratio = 0.0, total2;
639         const char *color = NULL;
640         int ctx = evsel_context(evsel);
641
642         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
643                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
644                 if (total) {
645                         ratio = avg / total;
646                         print_metric(ctxp, NULL, "%7.2f ",
647                                         "insn per cycle", ratio);
648                 } else {
649                         print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
650                 }
651                 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
652                 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
653
654                 if (total && avg) {
655                         out->new_line(ctxp);
656                         ratio = total / avg;
657                         print_metric(ctxp, NULL, "%7.2f ",
658                                         "stalled cycles per insn",
659                                         ratio);
660                 } else if (have_frontend_stalled) {
661                         print_metric(ctxp, NULL, NULL,
662                                      "stalled cycles per insn", 0);
663                 }
664         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
665                 if (runtime_branches_stats[ctx][cpu].n != 0)
666                         print_branch_misses(cpu, evsel, avg, out);
667                 else
668                         print_metric(ctxp, NULL, NULL, "of all branches", 0);
669         } else if (
670                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
671                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
672                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
673                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
674                 if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
675                         print_l1_dcache_misses(cpu, evsel, avg, out);
676                 else
677                         print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
678         } else if (
679                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
680                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
681                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
682                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
683                 if (runtime_l1_icache_stats[ctx][cpu].n != 0)
684                         print_l1_icache_misses(cpu, evsel, avg, out);
685                 else
686                         print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
687         } else if (
688                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
689                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
690                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
691                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
692                 if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
693                         print_dtlb_cache_misses(cpu, evsel, avg, out);
694                 else
695                         print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
696         } else if (
697                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
698                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
699                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
700                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
701                 if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
702                         print_itlb_cache_misses(cpu, evsel, avg, out);
703                 else
704                         print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
705         } else if (
706                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
707                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
708                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
709                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
710                 if (runtime_ll_cache_stats[ctx][cpu].n != 0)
711                         print_ll_cache_misses(cpu, evsel, avg, out);
712                 else
713                         print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
714         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
715                 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
716
717                 if (total)
718                         ratio = avg * 100 / total;
719
720                 if (runtime_cacherefs_stats[ctx][cpu].n != 0)
721                         print_metric(ctxp, NULL, "%8.3f %%",
722                                      "of all cache refs", ratio);
723                 else
724                         print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
725         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
726                 print_stalled_cycles_frontend(cpu, evsel, avg, out);
727         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
728                 print_stalled_cycles_backend(cpu, evsel, avg, out);
729         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
730                 total = avg_stats(&runtime_nsecs_stats[cpu]);
731
732                 if (total) {
733                         ratio = avg / total;
734                         print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
735                 } else {
736                         print_metric(ctxp, NULL, NULL, "Ghz", 0);
737                 }
738         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
739                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
740                 if (total)
741                         print_metric(ctxp, NULL,
742                                         "%7.2f%%", "transactional cycles",
743                                         100.0 * (avg / total));
744                 else
745                         print_metric(ctxp, NULL, NULL, "transactional cycles",
746                                      0);
747         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
748                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
749                 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
750                 if (total2 < avg)
751                         total2 = avg;
752                 if (total)
753                         print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
754                                 100.0 * ((total2-avg) / total));
755                 else
756                         print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
757         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
758                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
759
760                 if (avg)
761                         ratio = total / avg;
762
763                 if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
764                         print_metric(ctxp, NULL, "%8.0f",
765                                      "cycles / transaction", ratio);
766                 else
767                         print_metric(ctxp, NULL, NULL, "cycles / transaction",
768                                      0);
769         } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
770                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
771
772                 if (avg)
773                         ratio = total / avg;
774
775                 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
776         } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
777                    perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
778                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
779                         print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
780                                      avg / ratio);
781                 else
782                         print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
783         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
784                 double fe_bound = td_fe_bound(ctx, cpu);
785
786                 if (fe_bound > 0.2)
787                         color = PERF_COLOR_RED;
788                 print_metric(ctxp, color, "%8.1f%%", "frontend bound",
789                                 fe_bound * 100.);
790         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
791                 double retiring = td_retiring(ctx, cpu);
792
793                 if (retiring > 0.7)
794                         color = PERF_COLOR_GREEN;
795                 print_metric(ctxp, color, "%8.1f%%", "retiring",
796                                 retiring * 100.);
797         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
798                 double bad_spec = td_bad_spec(ctx, cpu);
799
800                 if (bad_spec > 0.1)
801                         color = PERF_COLOR_RED;
802                 print_metric(ctxp, color, "%8.1f%%", "bad speculation",
803                                 bad_spec * 100.);
804         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
805                 double be_bound = td_be_bound(ctx, cpu);
806                 const char *name = "backend bound";
807                 static int have_recovery_bubbles = -1;
808
809                 /* In case the CPU does not support topdown-recovery-bubbles */
810                 if (have_recovery_bubbles < 0)
811                         have_recovery_bubbles = pmu_have_event("cpu",
812                                         "topdown-recovery-bubbles");
813                 if (!have_recovery_bubbles)
814                         name = "backend bound/bad spec";
815
816                 if (be_bound > 0.2)
817                         color = PERF_COLOR_RED;
818                 if (td_total_slots(ctx, cpu) > 0)
819                         print_metric(ctxp, color, "%8.1f%%", name,
820                                         be_bound * 100.);
821                 else
822                         print_metric(ctxp, NULL, NULL, name, 0);
823         } else if (evsel->metric_expr) {
824                 struct parse_ctx pctx;
825                 int i;
826
827                 expr__ctx_init(&pctx);
828                 expr__add_id(&pctx, evsel->name, avg);
829                 for (i = 0; evsel->metric_events[i]; i++) {
830                         struct saved_value *v;
831
832                         v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
833                         if (!v)
834                                 break;
835                         expr__add_id(&pctx, evsel->metric_events[i]->name,
836                                              avg_stats(&v->stats));
837                 }
838                 if (!evsel->metric_events[i]) {
839                         const char *p = evsel->metric_expr;
840
841                         if (expr__parse(&ratio, &pctx, &p) == 0)
842                                 print_metric(ctxp, NULL, "%8.1f",
843                                         evsel->metric_name ?
844                                         evsel->metric_name :
845                                         out->force_header ?  evsel->name : "",
846                                         ratio);
847                         else
848                                 print_metric(ctxp, NULL, NULL, "", 0);
849                 } else
850                         print_metric(ctxp, NULL, NULL, "", 0);
851         } else if (runtime_nsecs_stats[cpu].n != 0) {
852                 char unit = 'M';
853                 char unit_buf[10];
854
855                 total = avg_stats(&runtime_nsecs_stats[cpu]);
856
857                 if (total)
858                         ratio = 1000.0 * avg / total;
859                 if (ratio < 0.001) {
860                         ratio *= 1000;
861                         unit = 'K';
862                 }
863                 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
864                 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
865         } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
866                 print_smi_cost(cpu, evsel, out);
867         } else {
868                 print_metric(ctxp, NULL, NULL, NULL, 0);
869         }
870 }