GNU Linux-libre 6.7.9-gnu
[releases.git] / arch / x86 / events / amd / uncore.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Advanced Micro Devices, Inc.
4  *
5  * Author: Jacob Shin <jacob.shin@amd.com>
6  */
7
8 #include <linux/perf_event.h>
9 #include <linux/percpu.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/cpu.h>
14 #include <linux/cpumask.h>
15 #include <linux/cpufeature.h>
16 #include <linux/smp.h>
17
18 #include <asm/perf_event.h>
19 #include <asm/msr.h>
20
21 #define NUM_COUNTERS_NB         4
22 #define NUM_COUNTERS_L2         4
23 #define NUM_COUNTERS_L3         6
24
25 #define RDPMC_BASE_NB           6
26 #define RDPMC_BASE_LLC          10
27
28 #define COUNTER_SHIFT           16
29 #define UNCORE_NAME_LEN         16
30 #define UNCORE_GROUP_MAX        256
31
32 #undef pr_fmt
33 #define pr_fmt(fmt)     "amd_uncore: " fmt
34
35 static int pmu_version;
36
37 struct amd_uncore_ctx {
38         int refcnt;
39         int cpu;
40         struct perf_event **events;
41         struct hlist_node node;
42 };
43
44 struct amd_uncore_pmu {
45         char name[UNCORE_NAME_LEN];
46         int num_counters;
47         int rdpmc_base;
48         u32 msr_base;
49         int group;
50         cpumask_t active_mask;
51         struct pmu pmu;
52         struct amd_uncore_ctx * __percpu *ctx;
53 };
54
55 enum {
56         UNCORE_TYPE_DF,
57         UNCORE_TYPE_L3,
58         UNCORE_TYPE_UMC,
59
60         UNCORE_TYPE_MAX
61 };
62
63 union amd_uncore_info {
64         struct {
65                 u64     aux_data:32;    /* auxiliary data */
66                 u64     num_pmcs:8;     /* number of counters */
67                 u64     gid:8;          /* group id */
68                 u64     cid:8;          /* context id */
69         } split;
70         u64             full;
71 };
72
73 struct amd_uncore {
74         union amd_uncore_info * __percpu info;
75         struct amd_uncore_pmu *pmus;
76         unsigned int num_pmus;
77         bool init_done;
78         void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
79         int  (*init)(struct amd_uncore *uncore, unsigned int cpu);
80         void (*move)(struct amd_uncore *uncore, unsigned int cpu);
81         void (*free)(struct amd_uncore *uncore, unsigned int cpu);
82 };
83
84 static struct amd_uncore uncores[UNCORE_TYPE_MAX];
85
86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
87 {
88         return container_of(event->pmu, struct amd_uncore_pmu, pmu);
89 }
90
91 static void amd_uncore_read(struct perf_event *event)
92 {
93         struct hw_perf_event *hwc = &event->hw;
94         u64 prev, new;
95         s64 delta;
96
97         /*
98          * since we do not enable counter overflow interrupts,
99          * we do not have to worry about prev_count changing on us
100          */
101
102         prev = local64_read(&hwc->prev_count);
103
104         /*
105          * Some uncore PMUs do not have RDPMC assignments. In such cases,
106          * read counts directly from the corresponding PERF_CTR.
107          */
108         if (hwc->event_base_rdpmc < 0)
109                 rdmsrl(hwc->event_base, new);
110         else
111                 rdpmcl(hwc->event_base_rdpmc, new);
112
113         local64_set(&hwc->prev_count, new);
114         delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
115         delta >>= COUNTER_SHIFT;
116         local64_add(delta, &event->count);
117 }
118
119 static void amd_uncore_start(struct perf_event *event, int flags)
120 {
121         struct hw_perf_event *hwc = &event->hw;
122
123         if (flags & PERF_EF_RELOAD)
124                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
125
126         hwc->state = 0;
127         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
128         perf_event_update_userpage(event);
129 }
130
131 static void amd_uncore_stop(struct perf_event *event, int flags)
132 {
133         struct hw_perf_event *hwc = &event->hw;
134
135         wrmsrl(hwc->config_base, hwc->config);
136         hwc->state |= PERF_HES_STOPPED;
137
138         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
139                 event->pmu->read(event);
140                 hwc->state |= PERF_HES_UPTODATE;
141         }
142 }
143
144 static int amd_uncore_add(struct perf_event *event, int flags)
145 {
146         int i;
147         struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
148         struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
149         struct hw_perf_event *hwc = &event->hw;
150
151         /* are we already assigned? */
152         if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
153                 goto out;
154
155         for (i = 0; i < pmu->num_counters; i++) {
156                 if (ctx->events[i] == event) {
157                         hwc->idx = i;
158                         goto out;
159                 }
160         }
161
162         /* if not, take the first available counter */
163         hwc->idx = -1;
164         for (i = 0; i < pmu->num_counters; i++) {
165                 if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
166                         hwc->idx = i;
167                         break;
168                 }
169         }
170
171 out:
172         if (hwc->idx == -1)
173                 return -EBUSY;
174
175         hwc->config_base = pmu->msr_base + (2 * hwc->idx);
176         hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
177         hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
178         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
179
180         if (pmu->rdpmc_base < 0)
181                 hwc->event_base_rdpmc = -1;
182
183         if (flags & PERF_EF_START)
184                 event->pmu->start(event, PERF_EF_RELOAD);
185
186         return 0;
187 }
188
189 static void amd_uncore_del(struct perf_event *event, int flags)
190 {
191         int i;
192         struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
193         struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
194         struct hw_perf_event *hwc = &event->hw;
195
196         event->pmu->stop(event, PERF_EF_UPDATE);
197
198         for (i = 0; i < pmu->num_counters; i++) {
199                 if (cmpxchg(&ctx->events[i], event, NULL) == event)
200                         break;
201         }
202
203         hwc->idx = -1;
204 }
205
206 static int amd_uncore_event_init(struct perf_event *event)
207 {
208         struct amd_uncore_pmu *pmu;
209         struct amd_uncore_ctx *ctx;
210         struct hw_perf_event *hwc = &event->hw;
211
212         if (event->attr.type != event->pmu->type)
213                 return -ENOENT;
214
215         if (event->cpu < 0)
216                 return -EINVAL;
217
218         pmu = event_to_amd_uncore_pmu(event);
219         ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
220         if (!ctx)
221                 return -ENODEV;
222
223         /*
224          * NB and Last level cache counters (MSRs) are shared across all cores
225          * that share the same NB / Last level cache.  On family 16h and below,
226          * Interrupts can be directed to a single target core, however, event
227          * counts generated by processes running on other cores cannot be masked
228          * out. So we do not support sampling and per-thread events via
229          * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
230          */
231         hwc->config = event->attr.config;
232         hwc->idx = -1;
233
234         /*
235          * since request can come in to any of the shared cores, we will remap
236          * to a single common cpu.
237          */
238         event->cpu = ctx->cpu;
239
240         return 0;
241 }
242
243 static umode_t
244 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
245 {
246         return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
247                attr->mode : 0;
248 }
249
250 static umode_t
251 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
252 {
253         return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
254 }
255
256 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
257                                             struct device_attribute *attr,
258                                             char *buf)
259 {
260         struct pmu *ptr = dev_get_drvdata(dev);
261         struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
262
263         return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
264 }
265 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
266
267 static struct attribute *amd_uncore_attrs[] = {
268         &dev_attr_cpumask.attr,
269         NULL,
270 };
271
272 static struct attribute_group amd_uncore_attr_group = {
273         .attrs = amd_uncore_attrs,
274 };
275
276 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                 \
277 static ssize_t __uncore_##_var##_show(struct device *dev,               \
278                                 struct device_attribute *attr,          \
279                                 char *page)                             \
280 {                                                                       \
281         BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
282         return sprintf(page, _format "\n");                             \
283 }                                                                       \
284 static struct device_attribute format_attr_##_var =                     \
285         __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
286
287 DEFINE_UNCORE_FORMAT_ATTR(event12,      event,          "config:0-7,32-35");
288 DEFINE_UNCORE_FORMAT_ATTR(event14,      event,          "config:0-7,32-35,59-60"); /* F17h+ DF */
289 DEFINE_UNCORE_FORMAT_ATTR(event14v2,    event,          "config:0-7,32-37");       /* PerfMonV2 DF */
290 DEFINE_UNCORE_FORMAT_ATTR(event8,       event,          "config:0-7");             /* F17h+ L3, PerfMonV2 UMC */
291 DEFINE_UNCORE_FORMAT_ATTR(umask8,       umask,          "config:8-15");
292 DEFINE_UNCORE_FORMAT_ATTR(umask12,      umask,          "config:8-15,24-27");      /* PerfMonV2 DF */
293 DEFINE_UNCORE_FORMAT_ATTR(coreid,       coreid,         "config:42-44");           /* F19h L3 */
294 DEFINE_UNCORE_FORMAT_ATTR(slicemask,    slicemask,      "config:48-51");           /* F17h L3 */
295 DEFINE_UNCORE_FORMAT_ATTR(threadmask8,  threadmask,     "config:56-63");           /* F17h L3 */
296 DEFINE_UNCORE_FORMAT_ATTR(threadmask2,  threadmask,     "config:56-57");           /* F19h L3 */
297 DEFINE_UNCORE_FORMAT_ATTR(enallslices,  enallslices,    "config:46");              /* F19h L3 */
298 DEFINE_UNCORE_FORMAT_ATTR(enallcores,   enallcores,     "config:47");              /* F19h L3 */
299 DEFINE_UNCORE_FORMAT_ATTR(sliceid,      sliceid,        "config:48-50");           /* F19h L3 */
300 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask,     rdwrmask,       "config:8-9");             /* PerfMonV2 UMC */
301
302 /* Common DF and NB attributes */
303 static struct attribute *amd_uncore_df_format_attr[] = {
304         &format_attr_event12.attr,      /* event */
305         &format_attr_umask8.attr,       /* umask */
306         NULL,
307 };
308
309 /* Common L2 and L3 attributes */
310 static struct attribute *amd_uncore_l3_format_attr[] = {
311         &format_attr_event12.attr,      /* event */
312         &format_attr_umask8.attr,       /* umask */
313         NULL,                           /* threadmask */
314         NULL,
315 };
316
317 /* Common UMC attributes */
318 static struct attribute *amd_uncore_umc_format_attr[] = {
319         &format_attr_event8.attr,       /* event */
320         &format_attr_rdwrmask.attr,     /* rdwrmask */
321         NULL,
322 };
323
324 /* F17h unique L3 attributes */
325 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
326         &format_attr_slicemask.attr,    /* slicemask */
327         NULL,
328 };
329
330 /* F19h unique L3 attributes */
331 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
332         &format_attr_coreid.attr,       /* coreid */
333         &format_attr_enallslices.attr,  /* enallslices */
334         &format_attr_enallcores.attr,   /* enallcores */
335         &format_attr_sliceid.attr,      /* sliceid */
336         NULL,
337 };
338
339 static struct attribute_group amd_uncore_df_format_group = {
340         .name = "format",
341         .attrs = amd_uncore_df_format_attr,
342 };
343
344 static struct attribute_group amd_uncore_l3_format_group = {
345         .name = "format",
346         .attrs = amd_uncore_l3_format_attr,
347 };
348
349 static struct attribute_group amd_f17h_uncore_l3_format_group = {
350         .name = "format",
351         .attrs = amd_f17h_uncore_l3_format_attr,
352         .is_visible = amd_f17h_uncore_is_visible,
353 };
354
355 static struct attribute_group amd_f19h_uncore_l3_format_group = {
356         .name = "format",
357         .attrs = amd_f19h_uncore_l3_format_attr,
358         .is_visible = amd_f19h_uncore_is_visible,
359 };
360
361 static struct attribute_group amd_uncore_umc_format_group = {
362         .name = "format",
363         .attrs = amd_uncore_umc_format_attr,
364 };
365
366 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
367         &amd_uncore_attr_group,
368         &amd_uncore_df_format_group,
369         NULL,
370 };
371
372 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
373         &amd_uncore_attr_group,
374         &amd_uncore_l3_format_group,
375         NULL,
376 };
377
378 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
379         &amd_f17h_uncore_l3_format_group,
380         &amd_f19h_uncore_l3_format_group,
381         NULL,
382 };
383
384 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
385         &amd_uncore_attr_group,
386         &amd_uncore_umc_format_group,
387         NULL,
388 };
389
390 static __always_inline
391 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
392 {
393         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
394         return info->split.cid;
395 }
396
397 static __always_inline
398 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
399 {
400         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
401         return info->split.gid;
402 }
403
404 static __always_inline
405 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
406 {
407         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
408         return info->split.num_pmcs;
409 }
410
411 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
412 {
413         struct amd_uncore_pmu *pmu;
414         struct amd_uncore_ctx *ctx;
415         int i;
416
417         if (!uncore->init_done)
418                 return;
419
420         for (i = 0; i < uncore->num_pmus; i++) {
421                 pmu = &uncore->pmus[i];
422                 ctx = *per_cpu_ptr(pmu->ctx, cpu);
423                 if (!ctx)
424                         continue;
425
426                 if (cpu == ctx->cpu)
427                         cpumask_clear_cpu(cpu, &pmu->active_mask);
428
429                 if (!--ctx->refcnt) {
430                         kfree(ctx->events);
431                         kfree(ctx);
432                 }
433
434                 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
435         }
436 }
437
438 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
439 {
440         struct amd_uncore_ctx *curr, *prev;
441         struct amd_uncore_pmu *pmu;
442         int node, cid, gid, i, j;
443
444         if (!uncore->init_done || !uncore->num_pmus)
445                 return 0;
446
447         cid = amd_uncore_ctx_cid(uncore, cpu);
448         gid = amd_uncore_ctx_gid(uncore, cpu);
449
450         for (i = 0; i < uncore->num_pmus; i++) {
451                 pmu = &uncore->pmus[i];
452                 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
453                 curr = NULL;
454
455                 /* Check for group exclusivity */
456                 if (gid != pmu->group)
457                         continue;
458
459                 /* Find a sibling context */
460                 for_each_online_cpu(j) {
461                         if (cpu == j)
462                                 continue;
463
464                         prev = *per_cpu_ptr(pmu->ctx, j);
465                         if (!prev)
466                                 continue;
467
468                         if (cid == amd_uncore_ctx_cid(uncore, j)) {
469                                 curr = prev;
470                                 break;
471                         }
472                 }
473
474                 /* Allocate context if sibling does not exist */
475                 if (!curr) {
476                         node = cpu_to_node(cpu);
477                         curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
478                         if (!curr)
479                                 goto fail;
480
481                         curr->cpu = cpu;
482                         curr->events = kzalloc_node(sizeof(*curr->events) *
483                                                     pmu->num_counters,
484                                                     GFP_KERNEL, node);
485                         if (!curr->events) {
486                                 kfree(curr);
487                                 goto fail;
488                         }
489
490                         cpumask_set_cpu(cpu, &pmu->active_mask);
491                 }
492
493                 curr->refcnt++;
494                 *per_cpu_ptr(pmu->ctx, cpu) = curr;
495         }
496
497         return 0;
498
499 fail:
500         amd_uncore_ctx_free(uncore, cpu);
501
502         return -ENOMEM;
503 }
504
505 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
506 {
507         struct amd_uncore_ctx *curr, *next;
508         struct amd_uncore_pmu *pmu;
509         int i, j;
510
511         if (!uncore->init_done)
512                 return;
513
514         for (i = 0; i < uncore->num_pmus; i++) {
515                 pmu = &uncore->pmus[i];
516                 curr = *per_cpu_ptr(pmu->ctx, cpu);
517                 if (!curr)
518                         continue;
519
520                 /* Migrate to a shared sibling if possible */
521                 for_each_online_cpu(j) {
522                         next = *per_cpu_ptr(pmu->ctx, j);
523                         if (!next || cpu == j)
524                                 continue;
525
526                         if (curr == next) {
527                                 perf_pmu_migrate_context(&pmu->pmu, cpu, j);
528                                 cpumask_clear_cpu(cpu, &pmu->active_mask);
529                                 cpumask_set_cpu(j, &pmu->active_mask);
530                                 next->cpu = j;
531                                 break;
532                         }
533                 }
534         }
535 }
536
537 static int amd_uncore_cpu_starting(unsigned int cpu)
538 {
539         struct amd_uncore *uncore;
540         int i;
541
542         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
543                 uncore = &uncores[i];
544                 uncore->scan(uncore, cpu);
545         }
546
547         return 0;
548 }
549
550 static int amd_uncore_cpu_online(unsigned int cpu)
551 {
552         struct amd_uncore *uncore;
553         int i;
554
555         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
556                 uncore = &uncores[i];
557                 if (uncore->init(uncore, cpu))
558                         break;
559         }
560
561         return 0;
562 }
563
564 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
565 {
566         struct amd_uncore *uncore;
567         int i;
568
569         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
570                 uncore = &uncores[i];
571                 uncore->move(uncore, cpu);
572         }
573
574         return 0;
575 }
576
577 static int amd_uncore_cpu_dead(unsigned int cpu)
578 {
579         struct amd_uncore *uncore;
580         int i;
581
582         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
583                 uncore = &uncores[i];
584                 uncore->free(uncore, cpu);
585         }
586
587         return 0;
588 }
589
590 static int amd_uncore_df_event_init(struct perf_event *event)
591 {
592         struct hw_perf_event *hwc = &event->hw;
593         int ret = amd_uncore_event_init(event);
594
595         if (ret || pmu_version < 2)
596                 return ret;
597
598         hwc->config = event->attr.config &
599                       (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
600                                           AMD64_RAW_EVENT_MASK_NB);
601
602         return 0;
603 }
604
605 static int amd_uncore_df_add(struct perf_event *event, int flags)
606 {
607         int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
608         struct hw_perf_event *hwc = &event->hw;
609
610         if (ret)
611                 return ret;
612
613         /*
614          * The first four DF counters are accessible via RDPMC index 6 to 9
615          * followed by the L3 counters from index 10 to 15. For processors
616          * with more than four DF counters, the DF RDPMC assignments become
617          * discontiguous as the additional counters are accessible starting
618          * from index 16.
619          */
620         if (hwc->idx >= NUM_COUNTERS_NB)
621                 hwc->event_base_rdpmc += NUM_COUNTERS_L3;
622
623         /* Delayed start after rdpmc base update */
624         if (flags & PERF_EF_START)
625                 amd_uncore_start(event, PERF_EF_RELOAD);
626
627         return 0;
628 }
629
630 static
631 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
632 {
633         union cpuid_0x80000022_ebx ebx;
634         union amd_uncore_info info;
635
636         if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
637                 return;
638
639         info.split.aux_data = 0;
640         info.split.num_pmcs = NUM_COUNTERS_NB;
641         info.split.gid = 0;
642         info.split.cid = topology_die_id(cpu);
643
644         if (pmu_version >= 2) {
645                 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
646                 info.split.num_pmcs = ebx.split.num_df_pmc;
647         }
648
649         *per_cpu_ptr(uncore->info, cpu) = info;
650 }
651
652 static
653 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
654 {
655         struct attribute **df_attr = amd_uncore_df_format_attr;
656         struct amd_uncore_pmu *pmu;
657
658         /* Run just once */
659         if (uncore->init_done)
660                 return amd_uncore_ctx_init(uncore, cpu);
661
662         /* No grouping, single instance for a system */
663         uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
664         if (!uncore->pmus) {
665                 uncore->num_pmus = 0;
666                 goto done;
667         }
668
669         /*
670          * For Family 17h and above, the Northbridge counters are repurposed
671          * as Data Fabric counters. The PMUs are exported based on family as
672          * either NB or DF.
673          */
674         pmu = &uncore->pmus[0];
675         strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
676                 sizeof(pmu->name));
677         pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
678         pmu->msr_base = MSR_F15H_NB_PERF_CTL;
679         pmu->rdpmc_base = RDPMC_BASE_NB;
680         pmu->group = amd_uncore_ctx_gid(uncore, cpu);
681
682         if (pmu_version >= 2) {
683                 *df_attr++ = &format_attr_event14v2.attr;
684                 *df_attr++ = &format_attr_umask12.attr;
685         } else if (boot_cpu_data.x86 >= 0x17) {
686                 *df_attr = &format_attr_event14.attr;
687         }
688
689         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
690         if (!pmu->ctx)
691                 goto done;
692
693         pmu->pmu = (struct pmu) {
694                 .task_ctx_nr    = perf_invalid_context,
695                 .attr_groups    = amd_uncore_df_attr_groups,
696                 .name           = pmu->name,
697                 .event_init     = amd_uncore_df_event_init,
698                 .add            = amd_uncore_df_add,
699                 .del            = amd_uncore_del,
700                 .start          = amd_uncore_start,
701                 .stop           = amd_uncore_stop,
702                 .read           = amd_uncore_read,
703                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
704                 .module         = THIS_MODULE,
705         };
706
707         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
708                 free_percpu(pmu->ctx);
709                 pmu->ctx = NULL;
710                 goto done;
711         }
712
713         pr_info("%d %s%s counters detected\n", pmu->num_counters,
714                 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
715                 pmu->pmu.name);
716
717         uncore->num_pmus = 1;
718
719 done:
720         uncore->init_done = true;
721
722         return amd_uncore_ctx_init(uncore, cpu);
723 }
724
725 static int amd_uncore_l3_event_init(struct perf_event *event)
726 {
727         int ret = amd_uncore_event_init(event);
728         struct hw_perf_event *hwc = &event->hw;
729         u64 config = event->attr.config;
730         u64 mask;
731
732         hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
733
734         /*
735          * SliceMask and ThreadMask need to be set for certain L3 events.
736          * For other events, the two fields do not affect the count.
737          */
738         if (ret || boot_cpu_data.x86 < 0x17)
739                 return ret;
740
741         mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
742                          AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
743                          AMD64_L3_COREID_MASK);
744
745         if (boot_cpu_data.x86 <= 0x18)
746                 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
747                        ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
748
749         /*
750          * If the user doesn't specify a ThreadMask, they're not trying to
751          * count core 0, so we enable all cores & threads.
752          * We'll also assume that they want to count slice 0 if they specify
753          * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
754          */
755         else if (!(config & AMD64_L3_F19H_THREAD_MASK))
756                 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
757                        AMD64_L3_EN_ALL_CORES;
758
759         hwc->config |= mask;
760
761         return 0;
762 }
763
764 static
765 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
766 {
767         union amd_uncore_info info;
768
769         if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
770                 return;
771
772         info.split.aux_data = 0;
773         info.split.num_pmcs = NUM_COUNTERS_L2;
774         info.split.gid = 0;
775         info.split.cid = per_cpu_llc_id(cpu);
776
777         if (boot_cpu_data.x86 >= 0x17)
778                 info.split.num_pmcs = NUM_COUNTERS_L3;
779
780         *per_cpu_ptr(uncore->info, cpu) = info;
781 }
782
783 static
784 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
785 {
786         struct attribute **l3_attr = amd_uncore_l3_format_attr;
787         struct amd_uncore_pmu *pmu;
788
789         /* Run just once */
790         if (uncore->init_done)
791                 return amd_uncore_ctx_init(uncore, cpu);
792
793         /* No grouping, single instance for a system */
794         uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
795         if (!uncore->pmus) {
796                 uncore->num_pmus = 0;
797                 goto done;
798         }
799
800         /*
801          * For Family 17h and above, L3 cache counters are available instead
802          * of L2 cache counters. The PMUs are exported based on family as
803          * either L2 or L3.
804          */
805         pmu = &uncore->pmus[0];
806         strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
807                 sizeof(pmu->name));
808         pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
809         pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
810         pmu->rdpmc_base = RDPMC_BASE_LLC;
811         pmu->group = amd_uncore_ctx_gid(uncore, cpu);
812
813         if (boot_cpu_data.x86 >= 0x17) {
814                 *l3_attr++ = &format_attr_event8.attr;
815                 *l3_attr++ = &format_attr_umask8.attr;
816                 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
817                              &format_attr_threadmask2.attr :
818                              &format_attr_threadmask8.attr;
819         }
820
821         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
822         if (!pmu->ctx)
823                 goto done;
824
825         pmu->pmu = (struct pmu) {
826                 .task_ctx_nr    = perf_invalid_context,
827                 .attr_groups    = amd_uncore_l3_attr_groups,
828                 .attr_update    = amd_uncore_l3_attr_update,
829                 .name           = pmu->name,
830                 .event_init     = amd_uncore_l3_event_init,
831                 .add            = amd_uncore_add,
832                 .del            = amd_uncore_del,
833                 .start          = amd_uncore_start,
834                 .stop           = amd_uncore_stop,
835                 .read           = amd_uncore_read,
836                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
837                 .module         = THIS_MODULE,
838         };
839
840         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
841                 free_percpu(pmu->ctx);
842                 pmu->ctx = NULL;
843                 goto done;
844         }
845
846         pr_info("%d %s%s counters detected\n", pmu->num_counters,
847                 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
848                 pmu->pmu.name);
849
850         uncore->num_pmus = 1;
851
852 done:
853         uncore->init_done = true;
854
855         return amd_uncore_ctx_init(uncore, cpu);
856 }
857
858 static int amd_uncore_umc_event_init(struct perf_event *event)
859 {
860         struct hw_perf_event *hwc = &event->hw;
861         int ret = amd_uncore_event_init(event);
862
863         if (ret)
864                 return ret;
865
866         hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
867
868         return 0;
869 }
870
871 static void amd_uncore_umc_start(struct perf_event *event, int flags)
872 {
873         struct hw_perf_event *hwc = &event->hw;
874
875         if (flags & PERF_EF_RELOAD)
876                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
877
878         hwc->state = 0;
879         wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
880         perf_event_update_userpage(event);
881 }
882
883 static
884 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
885 {
886         union cpuid_0x80000022_ebx ebx;
887         union amd_uncore_info info;
888         unsigned int eax, ecx, edx;
889
890         if (pmu_version < 2)
891                 return;
892
893         cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
894         info.split.aux_data = ecx;      /* stash active mask */
895         info.split.num_pmcs = ebx.split.num_umc_pmc;
896         info.split.gid = topology_die_id(cpu);
897         info.split.cid = topology_die_id(cpu);
898         *per_cpu_ptr(uncore->info, cpu) = info;
899 }
900
901 static
902 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
903 {
904         DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
905         u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
906         u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
907         union amd_uncore_info info;
908         struct amd_uncore_pmu *pmu;
909         int index = 0, gid, i;
910
911         if (pmu_version < 2)
912                 return 0;
913
914         /* Run just once */
915         if (uncore->init_done)
916                 return amd_uncore_ctx_init(uncore, cpu);
917
918         /* Find unique groups */
919         for_each_online_cpu(i) {
920                 info = *per_cpu_ptr(uncore->info, i);
921                 gid = info.split.gid;
922                 if (test_bit(gid, gmask))
923                         continue;
924
925                 __set_bit(gid, gmask);
926                 group_num_pmus[gid] = hweight32(info.split.aux_data);
927                 group_num_pmcs[gid] = info.split.num_pmcs;
928                 uncore->num_pmus += group_num_pmus[gid];
929         }
930
931         uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
932                                GFP_KERNEL);
933         if (!uncore->pmus) {
934                 uncore->num_pmus = 0;
935                 goto done;
936         }
937
938         for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
939                 for (i = 0; i < group_num_pmus[gid]; i++) {
940                         pmu = &uncore->pmus[index];
941                         snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
942                         pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
943                         pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
944                         pmu->rdpmc_base = -1;
945                         pmu->group = gid;
946
947                         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
948                         if (!pmu->ctx)
949                                 goto done;
950
951                         pmu->pmu = (struct pmu) {
952                                 .task_ctx_nr    = perf_invalid_context,
953                                 .attr_groups    = amd_uncore_umc_attr_groups,
954                                 .name           = pmu->name,
955                                 .event_init     = amd_uncore_umc_event_init,
956                                 .add            = amd_uncore_add,
957                                 .del            = amd_uncore_del,
958                                 .start          = amd_uncore_umc_start,
959                                 .stop           = amd_uncore_stop,
960                                 .read           = amd_uncore_read,
961                                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
962                                 .module         = THIS_MODULE,
963                         };
964
965                         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
966                                 free_percpu(pmu->ctx);
967                                 pmu->ctx = NULL;
968                                 goto done;
969                         }
970
971                         pr_info("%d %s counters detected\n", pmu->num_counters,
972                                 pmu->pmu.name);
973
974                         index++;
975                 }
976         }
977
978 done:
979         uncore->num_pmus = index;
980         uncore->init_done = true;
981
982         return amd_uncore_ctx_init(uncore, cpu);
983 }
984
985 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
986         /* UNCORE_TYPE_DF */
987         {
988                 .scan = amd_uncore_df_ctx_scan,
989                 .init = amd_uncore_df_ctx_init,
990                 .move = amd_uncore_ctx_move,
991                 .free = amd_uncore_ctx_free,
992         },
993         /* UNCORE_TYPE_L3 */
994         {
995                 .scan = amd_uncore_l3_ctx_scan,
996                 .init = amd_uncore_l3_ctx_init,
997                 .move = amd_uncore_ctx_move,
998                 .free = amd_uncore_ctx_free,
999         },
1000         /* UNCORE_TYPE_UMC */
1001         {
1002                 .scan = amd_uncore_umc_ctx_scan,
1003                 .init = amd_uncore_umc_ctx_init,
1004                 .move = amd_uncore_ctx_move,
1005                 .free = amd_uncore_ctx_free,
1006         },
1007 };
1008
1009 static int __init amd_uncore_init(void)
1010 {
1011         struct amd_uncore *uncore;
1012         int ret = -ENODEV;
1013         int i;
1014
1015         if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1016             boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1017                 return -ENODEV;
1018
1019         if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1020                 return -ENODEV;
1021
1022         if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1023                 pmu_version = 2;
1024
1025         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1026                 uncore = &uncores[i];
1027
1028                 BUG_ON(!uncore->scan);
1029                 BUG_ON(!uncore->init);
1030                 BUG_ON(!uncore->move);
1031                 BUG_ON(!uncore->free);
1032
1033                 uncore->info = alloc_percpu(union amd_uncore_info);
1034                 if (!uncore->info) {
1035                         ret = -ENOMEM;
1036                         goto fail;
1037                 }
1038         };
1039
1040         /*
1041          * Install callbacks. Core will call them for each online cpu.
1042          */
1043         ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1044                                 "perf/x86/amd/uncore:prepare",
1045                                 NULL, amd_uncore_cpu_dead);
1046         if (ret)
1047                 goto fail;
1048
1049         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1050                                 "perf/x86/amd/uncore:starting",
1051                                 amd_uncore_cpu_starting, NULL);
1052         if (ret)
1053                 goto fail_prep;
1054
1055         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1056                                 "perf/x86/amd/uncore:online",
1057                                 amd_uncore_cpu_online,
1058                                 amd_uncore_cpu_down_prepare);
1059         if (ret)
1060                 goto fail_start;
1061
1062         return 0;
1063
1064 fail_start:
1065         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1066 fail_prep:
1067         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1068 fail:
1069         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1070                 uncore = &uncores[i];
1071                 if (uncore->info) {
1072                         free_percpu(uncore->info);
1073                         uncore->info = NULL;
1074                 }
1075         }
1076
1077         return ret;
1078 }
1079
1080 static void __exit amd_uncore_exit(void)
1081 {
1082         struct amd_uncore *uncore;
1083         struct amd_uncore_pmu *pmu;
1084         int i, j;
1085
1086         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1087         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1088         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1089
1090         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1091                 uncore = &uncores[i];
1092                 if (!uncore->info)
1093                         continue;
1094
1095                 free_percpu(uncore->info);
1096                 uncore->info = NULL;
1097
1098                 for (j = 0; j < uncore->num_pmus; j++) {
1099                         pmu = &uncore->pmus[j];
1100                         if (!pmu->ctx)
1101                                 continue;
1102
1103                         perf_pmu_unregister(&pmu->pmu);
1104                         free_percpu(pmu->ctx);
1105                         pmu->ctx = NULL;
1106                 }
1107
1108                 kfree(uncore->pmus);
1109                 uncore->pmus = NULL;
1110         }
1111 }
1112
1113 module_init(amd_uncore_init);
1114 module_exit(amd_uncore_exit);
1115
1116 MODULE_DESCRIPTION("AMD Uncore Driver");
1117 MODULE_LICENSE("GPL v2");