GNU Linux-libre 6.7.9-gnu
[releases.git] / arch / x86 / events / amd / lbr.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/perf_event.h>
4
5 #include "../perf_event.h"
6
7 /* LBR Branch Select valid bits */
8 #define LBR_SELECT_MASK         0x1ff
9
10 /*
11  * LBR Branch Select filter bits which when set, ensures that the
12  * corresponding type of branches are not recorded
13  */
14 #define LBR_SELECT_KERNEL               0       /* Branches ending in CPL = 0 */
15 #define LBR_SELECT_USER                 1       /* Branches ending in CPL > 0 */
16 #define LBR_SELECT_JCC                  2       /* Conditional branches */
17 #define LBR_SELECT_CALL_NEAR_REL        3       /* Near relative calls */
18 #define LBR_SELECT_CALL_NEAR_IND        4       /* Indirect relative calls */
19 #define LBR_SELECT_RET_NEAR             5       /* Near returns */
20 #define LBR_SELECT_JMP_NEAR_IND         6       /* Near indirect jumps (excl. calls and returns) */
21 #define LBR_SELECT_JMP_NEAR_REL         7       /* Near relative jumps (excl. calls) */
22 #define LBR_SELECT_FAR_BRANCH           8       /* Far branches */
23
24 #define LBR_KERNEL      BIT(LBR_SELECT_KERNEL)
25 #define LBR_USER        BIT(LBR_SELECT_USER)
26 #define LBR_JCC         BIT(LBR_SELECT_JCC)
27 #define LBR_REL_CALL    BIT(LBR_SELECT_CALL_NEAR_REL)
28 #define LBR_IND_CALL    BIT(LBR_SELECT_CALL_NEAR_IND)
29 #define LBR_RETURN      BIT(LBR_SELECT_RET_NEAR)
30 #define LBR_REL_JMP     BIT(LBR_SELECT_JMP_NEAR_REL)
31 #define LBR_IND_JMP     BIT(LBR_SELECT_JMP_NEAR_IND)
32 #define LBR_FAR         BIT(LBR_SELECT_FAR_BRANCH)
33 #define LBR_NOT_SUPP    -1      /* unsupported filter */
34 #define LBR_IGNORE      0
35
36 #define LBR_ANY         \
37         (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |   \
38          LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
39
40 struct branch_entry {
41         union {
42                 struct {
43                         u64     ip:58;
44                         u64     ip_sign_ext:5;
45                         u64     mispredict:1;
46                 } split;
47                 u64             full;
48         } from;
49
50         union {
51                 struct {
52                         u64     ip:58;
53                         u64     ip_sign_ext:3;
54                         u64     reserved:1;
55                         u64     spec:1;
56                         u64     valid:1;
57                 } split;
58                 u64             full;
59         } to;
60 };
61
62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
63 {
64         wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
65 }
66
67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
68 {
69         wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
70 }
71
72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
73 {
74         u64 val;
75
76         rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
77
78         return val;
79 }
80
81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
82 {
83         u64 val;
84
85         rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
86
87         return val;
88 }
89
90 static __always_inline u64 sign_ext_branch_ip(u64 ip)
91 {
92         u32 shift = 64 - boot_cpu_data.x86_virt_bits;
93
94         return (u64)(((s64)ip << shift) >> shift);
95 }
96
97 static void amd_pmu_lbr_filter(void)
98 {
99         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100         int br_sel = cpuc->br_sel, offset, type, i, j;
101         bool compress = false;
102         bool fused_only = false;
103         u64 from, to;
104
105         /* If sampling all branches, there is nothing to filter */
106         if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107             ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108                 fused_only = true;
109
110         for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111                 from = cpuc->lbr_entries[i].from;
112                 to = cpuc->lbr_entries[i].to;
113                 type = branch_type_fused(from, to, 0, &offset);
114
115                 /*
116                  * Adjust the branch from address in case of instruction
117                  * fusion where it points to an instruction preceding the
118                  * actual branch
119                  */
120                 if (offset) {
121                         cpuc->lbr_entries[i].from += offset;
122                         if (fused_only)
123                                 continue;
124                 }
125
126                 /* If type does not correspond, then discard */
127                 if (type == X86_BR_NONE || (br_sel & type) != type) {
128                         cpuc->lbr_entries[i].from = 0;  /* mark invalid */
129                         compress = true;
130                 }
131
132                 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133                         cpuc->lbr_entries[i].type = common_branch_type(type);
134         }
135
136         if (!compress)
137                 return;
138
139         /* Remove all invalid entries */
140         for (i = 0; i < cpuc->lbr_stack.nr; ) {
141                 if (!cpuc->lbr_entries[i].from) {
142                         j = i;
143                         while (++j < cpuc->lbr_stack.nr)
144                                 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145                         cpuc->lbr_stack.nr--;
146                         if (!cpuc->lbr_entries[i].from)
147                                 continue;
148                 }
149                 i++;
150         }
151 }
152
153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154         PERF_BR_SPEC_NA,
155         PERF_BR_SPEC_WRONG_PATH,
156         PERF_BR_NON_SPEC_CORRECT_PATH,
157         PERF_BR_SPEC_CORRECT_PATH,
158 };
159
160 void amd_pmu_lbr_read(void)
161 {
162         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163         struct perf_branch_entry *br = cpuc->lbr_entries;
164         struct branch_entry entry;
165         int out = 0, idx, i;
166
167         if (!cpuc->lbr_users)
168                 return;
169
170         for (i = 0; i < x86_pmu.lbr_nr; i++) {
171                 entry.from.full = amd_pmu_lbr_get_from(i);
172                 entry.to.full   = amd_pmu_lbr_get_to(i);
173
174                 /*
175                  * Check if a branch has been logged; if valid = 0, spec = 0
176                  * then no branch was recorded
177                  */
178                 if (!entry.to.split.valid && !entry.to.split.spec)
179                         continue;
180
181                 perf_clear_branch_entry_bitfields(br + out);
182
183                 br[out].from    = sign_ext_branch_ip(entry.from.split.ip);
184                 br[out].to      = sign_ext_branch_ip(entry.to.split.ip);
185                 br[out].mispred = entry.from.split.mispredict;
186                 br[out].predicted = !br[out].mispred;
187
188                 /*
189                  * Set branch speculation information using the status of
190                  * the valid and spec bits.
191                  *
192                  * When valid = 0, spec = 0, no branch was recorded and the
193                  * entry is discarded as seen above.
194                  *
195                  * When valid = 0, spec = 1, the recorded branch was
196                  * speculative but took the wrong path.
197                  *
198                  * When valid = 1, spec = 0, the recorded branch was
199                  * non-speculative but took the correct path.
200                  *
201                  * When valid = 1, spec = 1, the recorded branch was
202                  * speculative and took the correct path
203                  */
204                 idx = (entry.to.split.valid << 1) | entry.to.split.spec;
205                 br[out].spec = lbr_spec_map[idx];
206                 out++;
207         }
208
209         cpuc->lbr_stack.nr = out;
210
211         /*
212          * Internal register renaming always ensures that LBR From[0] and
213          * LBR To[0] always represent the TOS
214          */
215         cpuc->lbr_stack.hw_idx = 0;
216
217         /* Perform further software filtering */
218         amd_pmu_lbr_filter();
219 }
220
221 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
222         [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
223         [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
224         [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGNORE,
225
226         [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
227         [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
228         [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
229         [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
230         [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]     = LBR_NOT_SUPP,
231         [PERF_SAMPLE_BRANCH_IN_TX_SHIFT]        = LBR_NOT_SUPP,
232         [PERF_SAMPLE_BRANCH_NO_TX_SHIFT]        = LBR_NOT_SUPP,
233         [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
234
235         [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_NOT_SUPP,
236         [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
237         [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
238
239         [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]     = LBR_NOT_SUPP,
240         [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]    = LBR_NOT_SUPP,
241 };
242
243 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
244 {
245         struct hw_perf_event_extra *reg = &event->hw.branch_reg;
246         u64 br_type = event->attr.branch_sample_type;
247         u64 mask = 0, v;
248         int i;
249
250         /* No LBR support */
251         if (!x86_pmu.lbr_nr)
252                 return -EOPNOTSUPP;
253
254         if (br_type & PERF_SAMPLE_BRANCH_USER)
255                 mask |= X86_BR_USER;
256
257         if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
258                 mask |= X86_BR_KERNEL;
259
260         /* Ignore BRANCH_HV here */
261
262         if (br_type & PERF_SAMPLE_BRANCH_ANY)
263                 mask |= X86_BR_ANY;
264
265         if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
266                 mask |= X86_BR_ANY_CALL;
267
268         if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
269                 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
270
271         if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
272                 mask |= X86_BR_IND_CALL;
273
274         if (br_type & PERF_SAMPLE_BRANCH_COND)
275                 mask |= X86_BR_JCC;
276
277         if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
278                 mask |= X86_BR_IND_JMP;
279
280         if (br_type & PERF_SAMPLE_BRANCH_CALL)
281                 mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
282
283         if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
284                 mask |= X86_BR_TYPE_SAVE;
285
286         reg->reg = mask;
287         mask = 0;
288
289         for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
290                 if (!(br_type & BIT_ULL(i)))
291                         continue;
292
293                 v = lbr_select_map[i];
294                 if (v == LBR_NOT_SUPP)
295                         return -EOPNOTSUPP;
296
297                 if (v != LBR_IGNORE)
298                         mask |= v;
299         }
300
301         /* Filter bits operate in suppress mode */
302         reg->config = mask ^ LBR_SELECT_MASK;
303
304         return 0;
305 }
306
307 int amd_pmu_lbr_hw_config(struct perf_event *event)
308 {
309         int ret = 0;
310
311         /* LBR is not recommended in counting mode */
312         if (!is_sampling_event(event))
313                 return -EINVAL;
314
315         ret = amd_pmu_lbr_setup_filter(event);
316         if (!ret)
317                 event->attach_state |= PERF_ATTACH_SCHED_CB;
318
319         return ret;
320 }
321
322 void amd_pmu_lbr_reset(void)
323 {
324         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
325         int i;
326
327         if (!x86_pmu.lbr_nr)
328                 return;
329
330         /* Reset all branch records individually */
331         for (i = 0; i < x86_pmu.lbr_nr; i++) {
332                 amd_pmu_lbr_set_from(i, 0);
333                 amd_pmu_lbr_set_to(i, 0);
334         }
335
336         cpuc->last_task_ctx = NULL;
337         cpuc->last_log_id = 0;
338         wrmsrl(MSR_AMD64_LBR_SELECT, 0);
339 }
340
341 void amd_pmu_lbr_add(struct perf_event *event)
342 {
343         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
344         struct hw_perf_event_extra *reg = &event->hw.branch_reg;
345
346         if (!x86_pmu.lbr_nr)
347                 return;
348
349         if (has_branch_stack(event)) {
350                 cpuc->lbr_select = 1;
351                 cpuc->lbr_sel->config = reg->config;
352                 cpuc->br_sel = reg->reg;
353         }
354
355         perf_sched_cb_inc(event->pmu);
356
357         if (!cpuc->lbr_users++ && !event->total_time_running)
358                 amd_pmu_lbr_reset();
359 }
360
361 void amd_pmu_lbr_del(struct perf_event *event)
362 {
363         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
364
365         if (!x86_pmu.lbr_nr)
366                 return;
367
368         if (has_branch_stack(event))
369                 cpuc->lbr_select = 0;
370
371         cpuc->lbr_users--;
372         WARN_ON_ONCE(cpuc->lbr_users < 0);
373         perf_sched_cb_dec(event->pmu);
374 }
375
376 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
377 {
378         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379
380         /*
381          * A context switch can flip the address space and LBR entries are
382          * not tagged with an identifier. Hence, branches cannot be resolved
383          * from the old address space and the LBR records should be wiped.
384          */
385         if (cpuc->lbr_users && sched_in)
386                 amd_pmu_lbr_reset();
387 }
388
389 void amd_pmu_lbr_enable_all(void)
390 {
391         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
392         u64 lbr_select, dbg_ctl, dbg_extn_cfg;
393
394         if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
395                 return;
396
397         /* Set hardware branch filter */
398         if (cpuc->lbr_select) {
399                 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
400                 wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
401         }
402
403         rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
404         rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
405
406         wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
407         wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
408 }
409
410 void amd_pmu_lbr_disable_all(void)
411 {
412         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
413         u64 dbg_ctl, dbg_extn_cfg;
414
415         if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
416                 return;
417
418         rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
419         rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
420
421         wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
422         wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
423 }
424
425 __init int amd_pmu_lbr_init(void)
426 {
427         union cpuid_0x80000022_ebx ebx;
428
429         if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
430                 return -EOPNOTSUPP;
431
432         /* Set number of entries */
433         ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
434         x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
435
436         pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
437
438         return 0;
439 }