GNU Linux-libre 5.19-rc6-gnu
[releases.git] / arch / x86 / events / zhaoxin / core.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Zhaoxin PMU; like Intel Architectural PerfMon-v2
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/stddef.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13 #include <linux/nmi.h>
14
15 #include <asm/cpufeature.h>
16 #include <asm/hardirq.h>
17 #include <asm/apic.h>
18
19 #include "../perf_event.h"
20
21 /*
22  * Zhaoxin PerfMon, used on zxc and later.
23  */
24 static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
25
26         [PERF_COUNT_HW_CPU_CYCLES]        = 0x0082,
27         [PERF_COUNT_HW_INSTRUCTIONS]      = 0x00c0,
28         [PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515,
29         [PERF_COUNT_HW_CACHE_MISSES]      = 0x051a,
30         [PERF_COUNT_HW_BUS_CYCLES]        = 0x0083,
31 };
32
33 static struct event_constraint zxc_event_constraints[] __read_mostly = {
34
35         FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
36         EVENT_CONSTRAINT_END
37 };
38
39 static struct event_constraint zxd_event_constraints[] __read_mostly = {
40
41         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
42         FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
43         FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
44         EVENT_CONSTRAINT_END
45 };
46
47 static __initconst const u64 zxd_hw_cache_event_ids
48                                 [PERF_COUNT_HW_CACHE_MAX]
49                                 [PERF_COUNT_HW_CACHE_OP_MAX]
50                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
51 [C(L1D)] = {
52         [C(OP_READ)] = {
53                 [C(RESULT_ACCESS)] = 0x0042,
54                 [C(RESULT_MISS)] = 0x0538,
55         },
56         [C(OP_WRITE)] = {
57                 [C(RESULT_ACCESS)] = 0x0043,
58                 [C(RESULT_MISS)] = 0x0562,
59         },
60         [C(OP_PREFETCH)] = {
61                 [C(RESULT_ACCESS)] = -1,
62                 [C(RESULT_MISS)] = -1,
63         },
64 },
65 [C(L1I)] = {
66         [C(OP_READ)] = {
67                 [C(RESULT_ACCESS)] = 0x0300,
68                 [C(RESULT_MISS)] = 0x0301,
69         },
70         [C(OP_WRITE)] = {
71                 [C(RESULT_ACCESS)] = -1,
72                 [C(RESULT_MISS)] = -1,
73         },
74         [C(OP_PREFETCH)] = {
75                 [C(RESULT_ACCESS)] = 0x030a,
76                 [C(RESULT_MISS)] = 0x030b,
77         },
78 },
79 [C(LL)] = {
80         [C(OP_READ)] = {
81                 [C(RESULT_ACCESS)] = -1,
82                 [C(RESULT_MISS)] = -1,
83         },
84         [C(OP_WRITE)] = {
85                 [C(RESULT_ACCESS)] = -1,
86                 [C(RESULT_MISS)] = -1,
87         },
88         [C(OP_PREFETCH)] = {
89                 [C(RESULT_ACCESS)] = -1,
90                 [C(RESULT_MISS)] = -1,
91         },
92 },
93 [C(DTLB)] = {
94         [C(OP_READ)] = {
95                 [C(RESULT_ACCESS)] = 0x0042,
96                 [C(RESULT_MISS)] = 0x052c,
97         },
98         [C(OP_WRITE)] = {
99                 [C(RESULT_ACCESS)] = 0x0043,
100                 [C(RESULT_MISS)] = 0x0530,
101         },
102         [C(OP_PREFETCH)] = {
103                 [C(RESULT_ACCESS)] = 0x0564,
104                 [C(RESULT_MISS)] = 0x0565,
105         },
106 },
107 [C(ITLB)] = {
108         [C(OP_READ)] = {
109                 [C(RESULT_ACCESS)] = 0x00c0,
110                 [C(RESULT_MISS)] = 0x0534,
111         },
112         [C(OP_WRITE)] = {
113                 [C(RESULT_ACCESS)] = -1,
114                 [C(RESULT_MISS)] = -1,
115         },
116         [C(OP_PREFETCH)] = {
117                 [C(RESULT_ACCESS)] = -1,
118                 [C(RESULT_MISS)] = -1,
119         },
120 },
121 [C(BPU)] = {
122         [C(OP_READ)] = {
123                 [C(RESULT_ACCESS)] = 0x0700,
124                 [C(RESULT_MISS)] = 0x0709,
125         },
126         [C(OP_WRITE)] = {
127                 [C(RESULT_ACCESS)] = -1,
128                 [C(RESULT_MISS)] = -1,
129         },
130         [C(OP_PREFETCH)] = {
131                 [C(RESULT_ACCESS)] = -1,
132                 [C(RESULT_MISS)] = -1,
133         },
134 },
135 [C(NODE)] = {
136         [C(OP_READ)] = {
137                 [C(RESULT_ACCESS)] = -1,
138                 [C(RESULT_MISS)] = -1,
139         },
140         [C(OP_WRITE)] = {
141                 [C(RESULT_ACCESS)] = -1,
142                 [C(RESULT_MISS)] = -1,
143         },
144         [C(OP_PREFETCH)] = {
145                 [C(RESULT_ACCESS)] = -1,
146                 [C(RESULT_MISS)] = -1,
147         },
148 },
149 };
150
151 static __initconst const u64 zxe_hw_cache_event_ids
152                                 [PERF_COUNT_HW_CACHE_MAX]
153                                 [PERF_COUNT_HW_CACHE_OP_MAX]
154                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
155 [C(L1D)] = {
156         [C(OP_READ)] = {
157                 [C(RESULT_ACCESS)] = 0x0568,
158                 [C(RESULT_MISS)] = 0x054b,
159         },
160         [C(OP_WRITE)] = {
161                 [C(RESULT_ACCESS)] = 0x0669,
162                 [C(RESULT_MISS)] = 0x0562,
163         },
164         [C(OP_PREFETCH)] = {
165                 [C(RESULT_ACCESS)] = -1,
166                 [C(RESULT_MISS)] = -1,
167         },
168 },
169 [C(L1I)] = {
170         [C(OP_READ)] = {
171                 [C(RESULT_ACCESS)] = 0x0300,
172                 [C(RESULT_MISS)] = 0x0301,
173         },
174         [C(OP_WRITE)] = {
175                 [C(RESULT_ACCESS)] = -1,
176                 [C(RESULT_MISS)] = -1,
177         },
178         [C(OP_PREFETCH)] = {
179                 [C(RESULT_ACCESS)] = 0x030a,
180                 [C(RESULT_MISS)] = 0x030b,
181         },
182 },
183 [C(LL)] = {
184         [C(OP_READ)] = {
185                 [C(RESULT_ACCESS)] = 0x0,
186                 [C(RESULT_MISS)] = 0x0,
187         },
188         [C(OP_WRITE)] = {
189                 [C(RESULT_ACCESS)] = 0x0,
190                 [C(RESULT_MISS)] = 0x0,
191         },
192         [C(OP_PREFETCH)] = {
193                 [C(RESULT_ACCESS)] = 0x0,
194                 [C(RESULT_MISS)] = 0x0,
195         },
196 },
197 [C(DTLB)] = {
198         [C(OP_READ)] = {
199                 [C(RESULT_ACCESS)] = 0x0568,
200                 [C(RESULT_MISS)] = 0x052c,
201         },
202         [C(OP_WRITE)] = {
203                 [C(RESULT_ACCESS)] = 0x0669,
204                 [C(RESULT_MISS)] = 0x0530,
205         },
206         [C(OP_PREFETCH)] = {
207                 [C(RESULT_ACCESS)] = 0x0564,
208                 [C(RESULT_MISS)] = 0x0565,
209         },
210 },
211 [C(ITLB)] = {
212         [C(OP_READ)] = {
213                 [C(RESULT_ACCESS)] = 0x00c0,
214                 [C(RESULT_MISS)] = 0x0534,
215         },
216         [C(OP_WRITE)] = {
217                 [C(RESULT_ACCESS)] = -1,
218                 [C(RESULT_MISS)] = -1,
219         },
220         [C(OP_PREFETCH)] = {
221                 [C(RESULT_ACCESS)] = -1,
222                 [C(RESULT_MISS)] = -1,
223         },
224 },
225 [C(BPU)] = {
226         [C(OP_READ)] = {
227                 [C(RESULT_ACCESS)] = 0x0028,
228                 [C(RESULT_MISS)] = 0x0029,
229         },
230         [C(OP_WRITE)] = {
231                 [C(RESULT_ACCESS)] = -1,
232                 [C(RESULT_MISS)] = -1,
233         },
234         [C(OP_PREFETCH)] = {
235                 [C(RESULT_ACCESS)] = -1,
236                 [C(RESULT_MISS)] = -1,
237         },
238 },
239 [C(NODE)] = {
240         [C(OP_READ)] = {
241                 [C(RESULT_ACCESS)] = -1,
242                 [C(RESULT_MISS)] = -1,
243         },
244         [C(OP_WRITE)] = {
245                 [C(RESULT_ACCESS)] = -1,
246                 [C(RESULT_MISS)] = -1,
247         },
248         [C(OP_PREFETCH)] = {
249                 [C(RESULT_ACCESS)] = -1,
250                 [C(RESULT_MISS)] = -1,
251         },
252 },
253 };
254
255 static void zhaoxin_pmu_disable_all(void)
256 {
257         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
258 }
259
260 static void zhaoxin_pmu_enable_all(int added)
261 {
262         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
263 }
264
265 static inline u64 zhaoxin_pmu_get_status(void)
266 {
267         u64 status;
268
269         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
270
271         return status;
272 }
273
274 static inline void zhaoxin_pmu_ack_status(u64 ack)
275 {
276         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
277 }
278
279 static inline void zxc_pmu_ack_status(u64 ack)
280 {
281         /*
282          * ZXC needs global control enabled in order to clear status bits.
283          */
284         zhaoxin_pmu_enable_all(0);
285         zhaoxin_pmu_ack_status(ack);
286         zhaoxin_pmu_disable_all();
287 }
288
289 static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
290 {
291         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
292         u64 ctrl_val, mask;
293
294         mask = 0xfULL << (idx * 4);
295
296         rdmsrl(hwc->config_base, ctrl_val);
297         ctrl_val &= ~mask;
298         wrmsrl(hwc->config_base, ctrl_val);
299 }
300
301 static void zhaoxin_pmu_disable_event(struct perf_event *event)
302 {
303         struct hw_perf_event *hwc = &event->hw;
304
305         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
306                 zhaoxin_pmu_disable_fixed(hwc);
307                 return;
308         }
309
310         x86_pmu_disable_event(event);
311 }
312
313 static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
314 {
315         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
316         u64 ctrl_val, bits, mask;
317
318         /*
319          * Enable IRQ generation (0x8),
320          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
321          * if requested:
322          */
323         bits = 0x8ULL;
324         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
325                 bits |= 0x2;
326         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
327                 bits |= 0x1;
328
329         bits <<= (idx * 4);
330         mask = 0xfULL << (idx * 4);
331
332         rdmsrl(hwc->config_base, ctrl_val);
333         ctrl_val &= ~mask;
334         ctrl_val |= bits;
335         wrmsrl(hwc->config_base, ctrl_val);
336 }
337
338 static void zhaoxin_pmu_enable_event(struct perf_event *event)
339 {
340         struct hw_perf_event *hwc = &event->hw;
341
342         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
343                 zhaoxin_pmu_enable_fixed(hwc);
344                 return;
345         }
346
347         __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
348 }
349
350 /*
351  * This handler is triggered by the local APIC, so the APIC IRQ handling
352  * rules apply:
353  */
354 static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
355 {
356         struct perf_sample_data data;
357         struct cpu_hw_events *cpuc;
358         int handled = 0;
359         u64 status;
360         int bit;
361
362         cpuc = this_cpu_ptr(&cpu_hw_events);
363         apic_write(APIC_LVTPC, APIC_DM_NMI);
364         zhaoxin_pmu_disable_all();
365         status = zhaoxin_pmu_get_status();
366         if (!status)
367                 goto done;
368
369 again:
370         if (x86_pmu.enabled_ack)
371                 zxc_pmu_ack_status(status);
372         else
373                 zhaoxin_pmu_ack_status(status);
374
375         inc_irq_stat(apic_perf_irqs);
376
377         /*
378          * CondChgd bit 63 doesn't mean any overflow status. Ignore
379          * and clear the bit.
380          */
381         if (__test_and_clear_bit(63, (unsigned long *)&status)) {
382                 if (!status)
383                         goto done;
384         }
385
386         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
387                 struct perf_event *event = cpuc->events[bit];
388
389                 handled++;
390
391                 if (!test_bit(bit, cpuc->active_mask))
392                         continue;
393
394                 x86_perf_event_update(event);
395                 perf_sample_data_init(&data, 0, event->hw.last_period);
396
397                 if (!x86_perf_event_set_period(event))
398                         continue;
399
400                 if (perf_event_overflow(event, &data, regs))
401                         x86_pmu_stop(event, 0);
402         }
403
404         /*
405          * Repeat if there is more work to be done:
406          */
407         status = zhaoxin_pmu_get_status();
408         if (status)
409                 goto again;
410
411 done:
412         zhaoxin_pmu_enable_all(0);
413         return handled;
414 }
415
416 static u64 zhaoxin_pmu_event_map(int hw_event)
417 {
418         return zx_pmon_event_map[hw_event];
419 }
420
421 static struct event_constraint *
422 zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
423                         struct perf_event *event)
424 {
425         struct event_constraint *c;
426
427         if (x86_pmu.event_constraints) {
428                 for_each_event_constraint(c, x86_pmu.event_constraints) {
429                         if ((event->hw.config & c->cmask) == c->code)
430                                 return c;
431                 }
432         }
433
434         return &unconstrained;
435 }
436
437 PMU_FORMAT_ATTR(event,  "config:0-7");
438 PMU_FORMAT_ATTR(umask,  "config:8-15");
439 PMU_FORMAT_ATTR(edge,   "config:18");
440 PMU_FORMAT_ATTR(inv,    "config:23");
441 PMU_FORMAT_ATTR(cmask,  "config:24-31");
442
443 static struct attribute *zx_arch_formats_attr[] = {
444         &format_attr_event.attr,
445         &format_attr_umask.attr,
446         &format_attr_edge.attr,
447         &format_attr_inv.attr,
448         &format_attr_cmask.attr,
449         NULL,
450 };
451
452 static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
453 {
454         u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
455
456         return x86_event_sysfs_show(page, config, event);
457 }
458
459 static const struct x86_pmu zhaoxin_pmu __initconst = {
460         .name                   = "zhaoxin",
461         .handle_irq             = zhaoxin_pmu_handle_irq,
462         .disable_all            = zhaoxin_pmu_disable_all,
463         .enable_all             = zhaoxin_pmu_enable_all,
464         .enable                 = zhaoxin_pmu_enable_event,
465         .disable                = zhaoxin_pmu_disable_event,
466         .hw_config              = x86_pmu_hw_config,
467         .schedule_events        = x86_schedule_events,
468         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
469         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
470         .event_map              = zhaoxin_pmu_event_map,
471         .max_events             = ARRAY_SIZE(zx_pmon_event_map),
472         .apic                   = 1,
473         /*
474          * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
475          */
476         .max_period             = (1ULL << 47) - 1,
477         .get_event_constraints  = zhaoxin_get_event_constraints,
478
479         .format_attrs           = zx_arch_formats_attr,
480         .events_sysfs_show      = zhaoxin_event_sysfs_show,
481 };
482
483 static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
484         { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
485         { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
486         { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
487         { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
488         { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
489         { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
490         { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
491 };
492
493 static __init void zhaoxin_arch_events_quirk(void)
494 {
495         int bit;
496
497         /* disable event that reported as not present by cpuid */
498         for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
499                 zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
500                 pr_warn("CPUID marked event: \'%s\' unavailable\n",
501                         zx_arch_events_map[bit].name);
502         }
503 }
504
505 __init int zhaoxin_pmu_init(void)
506 {
507         union cpuid10_edx edx;
508         union cpuid10_eax eax;
509         union cpuid10_ebx ebx;
510         struct event_constraint *c;
511         unsigned int unused;
512         int version;
513
514         pr_info("Welcome to zhaoxin pmu!\n");
515
516         /*
517          * Check whether the Architectural PerfMon supports
518          * hw_event or not.
519          */
520         cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
521
522         if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
523                 return -ENODEV;
524
525         version = eax.split.version_id;
526         if (version != 2)
527                 return -ENODEV;
528
529         x86_pmu = zhaoxin_pmu;
530         pr_info("Version check pass!\n");
531
532         x86_pmu.version                 = version;
533         x86_pmu.num_counters            = eax.split.num_counters;
534         x86_pmu.cntval_bits             = eax.split.bit_width;
535         x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
536         x86_pmu.events_maskl            = ebx.full;
537         x86_pmu.events_mask_len         = eax.split.mask_length;
538
539         x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
540         x86_add_quirk(zhaoxin_arch_events_quirk);
541
542         switch (boot_cpu_data.x86) {
543         case 0x06:
544                 if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
545
546                         x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
547
548                         /* Clearing status works only if the global control is enable on zxc. */
549                         x86_pmu.enabled_ack = 1;
550
551                         x86_pmu.event_constraints = zxc_event_constraints;
552                         zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
553                         zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
554                         zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
555                         zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
556
557                         pr_cont("ZXC events, ");
558                         break;
559                 }
560                 return -ENODEV;
561
562         case 0x07:
563                 zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
564                         X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
565
566                 zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
567                         X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
568
569                 switch (boot_cpu_data.x86_model) {
570                 case 0x1b:
571                         memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
572                                sizeof(hw_cache_event_ids));
573
574                         x86_pmu.event_constraints = zxd_event_constraints;
575
576                         zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
577                         zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
578
579                         pr_cont("ZXD events, ");
580                         break;
581                 case 0x3b:
582                         memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
583                                sizeof(hw_cache_event_ids));
584
585                         x86_pmu.event_constraints = zxd_event_constraints;
586
587                         zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
588                         zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
589
590                         pr_cont("ZXE events, ");
591                         break;
592                 default:
593                         return -ENODEV;
594                 }
595                 break;
596
597         default:
598                 return -ENODEV;
599         }
600
601         x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
602         x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
603
604         if (x86_pmu.event_constraints) {
605                 for_each_event_constraint(c, x86_pmu.event_constraints) {
606                         c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
607                         c->weight += x86_pmu.num_counters;
608                 }
609         }
610
611         return 0;
612 }
613