1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 * Author: Jacob Shin <jacob.shin@amd.com>
8 #include <linux/perf_event.h>
9 #include <linux/percpu.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/init.h>
13 #include <linux/cpu.h>
14 #include <linux/cpumask.h>
15 #include <linux/cpufeature.h>
16 #include <linux/smp.h>
18 #include <asm/perf_event.h>
21 #define NUM_COUNTERS_NB 4
22 #define NUM_COUNTERS_L2 4
23 #define NUM_COUNTERS_L3 6
25 #define RDPMC_BASE_NB 6
26 #define RDPMC_BASE_LLC 10
28 #define COUNTER_SHIFT 16
29 #define UNCORE_NAME_LEN 16
30 #define UNCORE_GROUP_MAX 256
33 #define pr_fmt(fmt) "amd_uncore: " fmt
35 static int pmu_version;
37 struct amd_uncore_ctx {
40 struct perf_event **events;
41 struct hlist_node node;
44 struct amd_uncore_pmu {
45 char name[UNCORE_NAME_LEN];
50 cpumask_t active_mask;
52 struct amd_uncore_ctx * __percpu *ctx;
63 union amd_uncore_info {
65 u64 aux_data:32; /* auxiliary data */
66 u64 num_pmcs:8; /* number of counters */
67 u64 gid:8; /* group id */
68 u64 cid:8; /* context id */
74 union amd_uncore_info * __percpu info;
75 struct amd_uncore_pmu *pmus;
76 unsigned int num_pmus;
78 void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
79 int (*init)(struct amd_uncore *uncore, unsigned int cpu);
80 void (*move)(struct amd_uncore *uncore, unsigned int cpu);
81 void (*free)(struct amd_uncore *uncore, unsigned int cpu);
84 static struct amd_uncore uncores[UNCORE_TYPE_MAX];
86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
88 return container_of(event->pmu, struct amd_uncore_pmu, pmu);
91 static void amd_uncore_read(struct perf_event *event)
93 struct hw_perf_event *hwc = &event->hw;
98 * since we do not enable counter overflow interrupts,
99 * we do not have to worry about prev_count changing on us
102 prev = local64_read(&hwc->prev_count);
105 * Some uncore PMUs do not have RDPMC assignments. In such cases,
106 * read counts directly from the corresponding PERF_CTR.
108 if (hwc->event_base_rdpmc < 0)
109 rdmsrl(hwc->event_base, new);
111 rdpmcl(hwc->event_base_rdpmc, new);
113 local64_set(&hwc->prev_count, new);
114 delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
115 delta >>= COUNTER_SHIFT;
116 local64_add(delta, &event->count);
119 static void amd_uncore_start(struct perf_event *event, int flags)
121 struct hw_perf_event *hwc = &event->hw;
123 if (flags & PERF_EF_RELOAD)
124 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
127 wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
128 perf_event_update_userpage(event);
131 static void amd_uncore_stop(struct perf_event *event, int flags)
133 struct hw_perf_event *hwc = &event->hw;
135 wrmsrl(hwc->config_base, hwc->config);
136 hwc->state |= PERF_HES_STOPPED;
138 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
139 event->pmu->read(event);
140 hwc->state |= PERF_HES_UPTODATE;
144 static int amd_uncore_add(struct perf_event *event, int flags)
147 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
148 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
149 struct hw_perf_event *hwc = &event->hw;
151 /* are we already assigned? */
152 if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
155 for (i = 0; i < pmu->num_counters; i++) {
156 if (ctx->events[i] == event) {
162 /* if not, take the first available counter */
164 for (i = 0; i < pmu->num_counters; i++) {
165 if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
175 hwc->config_base = pmu->msr_base + (2 * hwc->idx);
176 hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
177 hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
178 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
180 if (pmu->rdpmc_base < 0)
181 hwc->event_base_rdpmc = -1;
183 if (flags & PERF_EF_START)
184 event->pmu->start(event, PERF_EF_RELOAD);
189 static void amd_uncore_del(struct perf_event *event, int flags)
192 struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
193 struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
194 struct hw_perf_event *hwc = &event->hw;
196 event->pmu->stop(event, PERF_EF_UPDATE);
198 for (i = 0; i < pmu->num_counters; i++) {
199 if (cmpxchg(&ctx->events[i], event, NULL) == event)
206 static int amd_uncore_event_init(struct perf_event *event)
208 struct amd_uncore_pmu *pmu;
209 struct amd_uncore_ctx *ctx;
210 struct hw_perf_event *hwc = &event->hw;
212 if (event->attr.type != event->pmu->type)
218 pmu = event_to_amd_uncore_pmu(event);
219 ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
224 * NB and Last level cache counters (MSRs) are shared across all cores
225 * that share the same NB / Last level cache. On family 16h and below,
226 * Interrupts can be directed to a single target core, however, event
227 * counts generated by processes running on other cores cannot be masked
228 * out. So we do not support sampling and per-thread events via
229 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
231 hwc->config = event->attr.config;
235 * since request can come in to any of the shared cores, we will remap
236 * to a single common cpu.
238 event->cpu = ctx->cpu;
244 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
246 return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
251 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
253 return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
256 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
257 struct device_attribute *attr,
260 struct pmu *ptr = dev_get_drvdata(dev);
261 struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
263 return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
265 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
267 static struct attribute *amd_uncore_attrs[] = {
268 &dev_attr_cpumask.attr,
272 static struct attribute_group amd_uncore_attr_group = {
273 .attrs = amd_uncore_attrs,
276 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
277 static ssize_t __uncore_##_var##_show(struct device *dev, \
278 struct device_attribute *attr, \
281 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
282 return sprintf(page, _format "\n"); \
284 static struct device_attribute format_attr_##_var = \
285 __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
287 DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
288 DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
289 DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
290 DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
291 DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
292 DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
293 DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
294 DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
295 DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
296 DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
297 DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
298 DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
299 DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
300 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
302 /* Common DF and NB attributes */
303 static struct attribute *amd_uncore_df_format_attr[] = {
304 &format_attr_event12.attr, /* event */
305 &format_attr_umask8.attr, /* umask */
309 /* Common L2 and L3 attributes */
310 static struct attribute *amd_uncore_l3_format_attr[] = {
311 &format_attr_event12.attr, /* event */
312 &format_attr_umask8.attr, /* umask */
313 NULL, /* threadmask */
317 /* Common UMC attributes */
318 static struct attribute *amd_uncore_umc_format_attr[] = {
319 &format_attr_event8.attr, /* event */
320 &format_attr_rdwrmask.attr, /* rdwrmask */
324 /* F17h unique L3 attributes */
325 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
326 &format_attr_slicemask.attr, /* slicemask */
330 /* F19h unique L3 attributes */
331 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
332 &format_attr_coreid.attr, /* coreid */
333 &format_attr_enallslices.attr, /* enallslices */
334 &format_attr_enallcores.attr, /* enallcores */
335 &format_attr_sliceid.attr, /* sliceid */
339 static struct attribute_group amd_uncore_df_format_group = {
341 .attrs = amd_uncore_df_format_attr,
344 static struct attribute_group amd_uncore_l3_format_group = {
346 .attrs = amd_uncore_l3_format_attr,
349 static struct attribute_group amd_f17h_uncore_l3_format_group = {
351 .attrs = amd_f17h_uncore_l3_format_attr,
352 .is_visible = amd_f17h_uncore_is_visible,
355 static struct attribute_group amd_f19h_uncore_l3_format_group = {
357 .attrs = amd_f19h_uncore_l3_format_attr,
358 .is_visible = amd_f19h_uncore_is_visible,
361 static struct attribute_group amd_uncore_umc_format_group = {
363 .attrs = amd_uncore_umc_format_attr,
366 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
367 &amd_uncore_attr_group,
368 &amd_uncore_df_format_group,
372 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
373 &amd_uncore_attr_group,
374 &amd_uncore_l3_format_group,
378 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
379 &amd_f17h_uncore_l3_format_group,
380 &amd_f19h_uncore_l3_format_group,
384 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
385 &amd_uncore_attr_group,
386 &amd_uncore_umc_format_group,
390 static __always_inline
391 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
393 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
394 return info->split.cid;
397 static __always_inline
398 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
400 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
401 return info->split.gid;
404 static __always_inline
405 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
407 union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
408 return info->split.num_pmcs;
411 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
413 struct amd_uncore_pmu *pmu;
414 struct amd_uncore_ctx *ctx;
417 if (!uncore->init_done)
420 for (i = 0; i < uncore->num_pmus; i++) {
421 pmu = &uncore->pmus[i];
422 ctx = *per_cpu_ptr(pmu->ctx, cpu);
427 cpumask_clear_cpu(cpu, &pmu->active_mask);
429 if (!--ctx->refcnt) {
434 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
438 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
440 struct amd_uncore_ctx *curr, *prev;
441 struct amd_uncore_pmu *pmu;
442 int node, cid, gid, i, j;
444 if (!uncore->init_done || !uncore->num_pmus)
447 cid = amd_uncore_ctx_cid(uncore, cpu);
448 gid = amd_uncore_ctx_gid(uncore, cpu);
450 for (i = 0; i < uncore->num_pmus; i++) {
451 pmu = &uncore->pmus[i];
452 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
455 /* Check for group exclusivity */
456 if (gid != pmu->group)
459 /* Find a sibling context */
460 for_each_online_cpu(j) {
464 prev = *per_cpu_ptr(pmu->ctx, j);
468 if (cid == amd_uncore_ctx_cid(uncore, j)) {
474 /* Allocate context if sibling does not exist */
476 node = cpu_to_node(cpu);
477 curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
482 curr->events = kzalloc_node(sizeof(*curr->events) *
490 cpumask_set_cpu(cpu, &pmu->active_mask);
494 *per_cpu_ptr(pmu->ctx, cpu) = curr;
500 amd_uncore_ctx_free(uncore, cpu);
505 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
507 struct amd_uncore_ctx *curr, *next;
508 struct amd_uncore_pmu *pmu;
511 if (!uncore->init_done)
514 for (i = 0; i < uncore->num_pmus; i++) {
515 pmu = &uncore->pmus[i];
516 curr = *per_cpu_ptr(pmu->ctx, cpu);
520 /* Migrate to a shared sibling if possible */
521 for_each_online_cpu(j) {
522 next = *per_cpu_ptr(pmu->ctx, j);
523 if (!next || cpu == j)
527 perf_pmu_migrate_context(&pmu->pmu, cpu, j);
528 cpumask_clear_cpu(cpu, &pmu->active_mask);
529 cpumask_set_cpu(j, &pmu->active_mask);
537 static int amd_uncore_cpu_starting(unsigned int cpu)
539 struct amd_uncore *uncore;
542 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
543 uncore = &uncores[i];
544 uncore->scan(uncore, cpu);
550 static int amd_uncore_cpu_online(unsigned int cpu)
552 struct amd_uncore *uncore;
555 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
556 uncore = &uncores[i];
557 if (uncore->init(uncore, cpu))
564 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
566 struct amd_uncore *uncore;
569 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
570 uncore = &uncores[i];
571 uncore->move(uncore, cpu);
577 static int amd_uncore_cpu_dead(unsigned int cpu)
579 struct amd_uncore *uncore;
582 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
583 uncore = &uncores[i];
584 uncore->free(uncore, cpu);
590 static int amd_uncore_df_event_init(struct perf_event *event)
592 struct hw_perf_event *hwc = &event->hw;
593 int ret = amd_uncore_event_init(event);
595 if (ret || pmu_version < 2)
598 hwc->config = event->attr.config &
599 (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
600 AMD64_RAW_EVENT_MASK_NB);
605 static int amd_uncore_df_add(struct perf_event *event, int flags)
607 int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
608 struct hw_perf_event *hwc = &event->hw;
614 * The first four DF counters are accessible via RDPMC index 6 to 9
615 * followed by the L3 counters from index 10 to 15. For processors
616 * with more than four DF counters, the DF RDPMC assignments become
617 * discontiguous as the additional counters are accessible starting
620 if (hwc->idx >= NUM_COUNTERS_NB)
621 hwc->event_base_rdpmc += NUM_COUNTERS_L3;
623 /* Delayed start after rdpmc base update */
624 if (flags & PERF_EF_START)
625 amd_uncore_start(event, PERF_EF_RELOAD);
631 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
633 union cpuid_0x80000022_ebx ebx;
634 union amd_uncore_info info;
636 if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
639 info.split.aux_data = 0;
640 info.split.num_pmcs = NUM_COUNTERS_NB;
642 info.split.cid = topology_die_id(cpu);
644 if (pmu_version >= 2) {
645 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
646 info.split.num_pmcs = ebx.split.num_df_pmc;
649 *per_cpu_ptr(uncore->info, cpu) = info;
653 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
655 struct attribute **df_attr = amd_uncore_df_format_attr;
656 struct amd_uncore_pmu *pmu;
659 if (uncore->init_done)
660 return amd_uncore_ctx_init(uncore, cpu);
662 /* No grouping, single instance for a system */
663 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
665 uncore->num_pmus = 0;
670 * For Family 17h and above, the Northbridge counters are repurposed
671 * as Data Fabric counters. The PMUs are exported based on family as
674 pmu = &uncore->pmus[0];
675 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
677 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
678 pmu->msr_base = MSR_F15H_NB_PERF_CTL;
679 pmu->rdpmc_base = RDPMC_BASE_NB;
680 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
682 if (pmu_version >= 2) {
683 *df_attr++ = &format_attr_event14v2.attr;
684 *df_attr++ = &format_attr_umask12.attr;
685 } else if (boot_cpu_data.x86 >= 0x17) {
686 *df_attr = &format_attr_event14.attr;
689 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
693 pmu->pmu = (struct pmu) {
694 .task_ctx_nr = perf_invalid_context,
695 .attr_groups = amd_uncore_df_attr_groups,
697 .event_init = amd_uncore_df_event_init,
698 .add = amd_uncore_df_add,
699 .del = amd_uncore_del,
700 .start = amd_uncore_start,
701 .stop = amd_uncore_stop,
702 .read = amd_uncore_read,
703 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
704 .module = THIS_MODULE,
707 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
708 free_percpu(pmu->ctx);
713 pr_info("%d %s%s counters detected\n", pmu->num_counters,
714 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
717 uncore->num_pmus = 1;
720 uncore->init_done = true;
722 return amd_uncore_ctx_init(uncore, cpu);
725 static int amd_uncore_l3_event_init(struct perf_event *event)
727 int ret = amd_uncore_event_init(event);
728 struct hw_perf_event *hwc = &event->hw;
729 u64 config = event->attr.config;
732 hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
735 * SliceMask and ThreadMask need to be set for certain L3 events.
736 * For other events, the two fields do not affect the count.
738 if (ret || boot_cpu_data.x86 < 0x17)
741 mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
742 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
743 AMD64_L3_COREID_MASK);
745 if (boot_cpu_data.x86 <= 0x18)
746 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
747 ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
750 * If the user doesn't specify a ThreadMask, they're not trying to
751 * count core 0, so we enable all cores & threads.
752 * We'll also assume that they want to count slice 0 if they specify
753 * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
755 else if (!(config & AMD64_L3_F19H_THREAD_MASK))
756 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
757 AMD64_L3_EN_ALL_CORES;
765 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
767 union amd_uncore_info info;
769 if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
772 info.split.aux_data = 0;
773 info.split.num_pmcs = NUM_COUNTERS_L2;
775 info.split.cid = per_cpu_llc_id(cpu);
777 if (boot_cpu_data.x86 >= 0x17)
778 info.split.num_pmcs = NUM_COUNTERS_L3;
780 *per_cpu_ptr(uncore->info, cpu) = info;
784 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
786 struct attribute **l3_attr = amd_uncore_l3_format_attr;
787 struct amd_uncore_pmu *pmu;
790 if (uncore->init_done)
791 return amd_uncore_ctx_init(uncore, cpu);
793 /* No grouping, single instance for a system */
794 uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
796 uncore->num_pmus = 0;
801 * For Family 17h and above, L3 cache counters are available instead
802 * of L2 cache counters. The PMUs are exported based on family as
805 pmu = &uncore->pmus[0];
806 strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
808 pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
809 pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
810 pmu->rdpmc_base = RDPMC_BASE_LLC;
811 pmu->group = amd_uncore_ctx_gid(uncore, cpu);
813 if (boot_cpu_data.x86 >= 0x17) {
814 *l3_attr++ = &format_attr_event8.attr;
815 *l3_attr++ = &format_attr_umask8.attr;
816 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
817 &format_attr_threadmask2.attr :
818 &format_attr_threadmask8.attr;
821 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
825 pmu->pmu = (struct pmu) {
826 .task_ctx_nr = perf_invalid_context,
827 .attr_groups = amd_uncore_l3_attr_groups,
828 .attr_update = amd_uncore_l3_attr_update,
830 .event_init = amd_uncore_l3_event_init,
831 .add = amd_uncore_add,
832 .del = amd_uncore_del,
833 .start = amd_uncore_start,
834 .stop = amd_uncore_stop,
835 .read = amd_uncore_read,
836 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
837 .module = THIS_MODULE,
840 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
841 free_percpu(pmu->ctx);
846 pr_info("%d %s%s counters detected\n", pmu->num_counters,
847 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
850 uncore->num_pmus = 1;
853 uncore->init_done = true;
855 return amd_uncore_ctx_init(uncore, cpu);
858 static int amd_uncore_umc_event_init(struct perf_event *event)
860 struct hw_perf_event *hwc = &event->hw;
861 int ret = amd_uncore_event_init(event);
866 hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
871 static void amd_uncore_umc_start(struct perf_event *event, int flags)
873 struct hw_perf_event *hwc = &event->hw;
875 if (flags & PERF_EF_RELOAD)
876 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
879 wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
880 perf_event_update_userpage(event);
884 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
886 union cpuid_0x80000022_ebx ebx;
887 union amd_uncore_info info;
888 unsigned int eax, ecx, edx;
893 cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
894 info.split.aux_data = ecx; /* stash active mask */
895 info.split.num_pmcs = ebx.split.num_umc_pmc;
896 info.split.gid = topology_die_id(cpu);
897 info.split.cid = topology_die_id(cpu);
898 *per_cpu_ptr(uncore->info, cpu) = info;
902 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
904 DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
905 u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
906 u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
907 union amd_uncore_info info;
908 struct amd_uncore_pmu *pmu;
909 int index = 0, gid, i;
915 if (uncore->init_done)
916 return amd_uncore_ctx_init(uncore, cpu);
918 /* Find unique groups */
919 for_each_online_cpu(i) {
920 info = *per_cpu_ptr(uncore->info, i);
921 gid = info.split.gid;
922 if (test_bit(gid, gmask))
925 __set_bit(gid, gmask);
926 group_num_pmus[gid] = hweight32(info.split.aux_data);
927 group_num_pmcs[gid] = info.split.num_pmcs;
928 uncore->num_pmus += group_num_pmus[gid];
931 uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
934 uncore->num_pmus = 0;
938 for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
939 for (i = 0; i < group_num_pmus[gid]; i++) {
940 pmu = &uncore->pmus[index];
941 snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
942 pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
943 pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
944 pmu->rdpmc_base = -1;
947 pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
951 pmu->pmu = (struct pmu) {
952 .task_ctx_nr = perf_invalid_context,
953 .attr_groups = amd_uncore_umc_attr_groups,
955 .event_init = amd_uncore_umc_event_init,
956 .add = amd_uncore_add,
957 .del = amd_uncore_del,
958 .start = amd_uncore_umc_start,
959 .stop = amd_uncore_stop,
960 .read = amd_uncore_read,
961 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
962 .module = THIS_MODULE,
965 if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
966 free_percpu(pmu->ctx);
971 pr_info("%d %s counters detected\n", pmu->num_counters,
979 uncore->num_pmus = index;
980 uncore->init_done = true;
982 return amd_uncore_ctx_init(uncore, cpu);
985 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
988 .scan = amd_uncore_df_ctx_scan,
989 .init = amd_uncore_df_ctx_init,
990 .move = amd_uncore_ctx_move,
991 .free = amd_uncore_ctx_free,
995 .scan = amd_uncore_l3_ctx_scan,
996 .init = amd_uncore_l3_ctx_init,
997 .move = amd_uncore_ctx_move,
998 .free = amd_uncore_ctx_free,
1000 /* UNCORE_TYPE_UMC */
1002 .scan = amd_uncore_umc_ctx_scan,
1003 .init = amd_uncore_umc_ctx_init,
1004 .move = amd_uncore_ctx_move,
1005 .free = amd_uncore_ctx_free,
1009 static int __init amd_uncore_init(void)
1011 struct amd_uncore *uncore;
1015 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1016 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1019 if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1022 if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1025 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1026 uncore = &uncores[i];
1028 BUG_ON(!uncore->scan);
1029 BUG_ON(!uncore->init);
1030 BUG_ON(!uncore->move);
1031 BUG_ON(!uncore->free);
1033 uncore->info = alloc_percpu(union amd_uncore_info);
1034 if (!uncore->info) {
1041 * Install callbacks. Core will call them for each online cpu.
1043 ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1044 "perf/x86/amd/uncore:prepare",
1045 NULL, amd_uncore_cpu_dead);
1049 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1050 "perf/x86/amd/uncore:starting",
1051 amd_uncore_cpu_starting, NULL);
1055 ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1056 "perf/x86/amd/uncore:online",
1057 amd_uncore_cpu_online,
1058 amd_uncore_cpu_down_prepare);
1065 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1067 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1069 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1070 uncore = &uncores[i];
1072 free_percpu(uncore->info);
1073 uncore->info = NULL;
1080 static void __exit amd_uncore_exit(void)
1082 struct amd_uncore *uncore;
1083 struct amd_uncore_pmu *pmu;
1086 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1087 cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1088 cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1090 for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1091 uncore = &uncores[i];
1095 free_percpu(uncore->info);
1096 uncore->info = NULL;
1098 for (j = 0; j < uncore->num_pmus; j++) {
1099 pmu = &uncore->pmus[j];
1103 perf_pmu_unregister(&pmu->pmu);
1104 free_percpu(pmu->ctx);
1108 kfree(uncore->pmus);
1109 uncore->pmus = NULL;
1113 module_init(amd_uncore_init);
1114 module_exit(amd_uncore_exit);
1116 MODULE_DESCRIPTION("AMD Uncore Driver");
1117 MODULE_LICENSE("GPL v2");