1 // SPDX-License-Identifier: GPL-2.0-only
3 * Support Intel IOMMU PerfMon
4 * Copyright(c) 2023 Intel Corporation.
6 #define pr_fmt(fmt) "DMAR: " fmt
7 #define dev_fmt(fmt) pr_fmt(fmt)
9 #include <linux/dmar.h>
13 PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */
14 PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */
16 static struct attribute *iommu_pmu_format_attrs[] = {
17 &format_attr_event_group.attr,
18 &format_attr_event.attr,
22 static struct attribute_group iommu_pmu_format_attr_group = {
24 .attrs = iommu_pmu_format_attrs,
27 /* The available events are added in attr_update later */
28 static struct attribute *attrs_empty[] = {
32 static struct attribute_group iommu_pmu_events_attr_group = {
37 static cpumask_t iommu_pmu_cpu_mask;
40 cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
42 return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
44 static DEVICE_ATTR_RO(cpumask);
46 static struct attribute *iommu_pmu_cpumask_attrs[] = {
47 &dev_attr_cpumask.attr,
51 static struct attribute_group iommu_pmu_cpumask_attr_group = {
52 .attrs = iommu_pmu_cpumask_attrs,
55 static const struct attribute_group *iommu_pmu_attr_groups[] = {
56 &iommu_pmu_format_attr_group,
57 &iommu_pmu_events_attr_group,
58 &iommu_pmu_cpumask_attr_group,
62 static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
65 * The perf_event creates its own dev for each PMU.
68 return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
71 #define IOMMU_PMU_ATTR(_name, _format, _filter) \
72 PMU_FORMAT_ATTR(_name, _format); \
74 static struct attribute *_name##_attr[] = { \
75 &format_attr_##_name.attr, \
80 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
82 struct device *dev = kobj_to_dev(kobj); \
83 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
87 return (iommu_pmu->filter & _filter) ? attr->mode : 0; \
90 static struct attribute_group _name = { \
92 .attrs = _name##_attr, \
93 .is_visible = _name##_is_visible, \
96 IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID);
97 IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN);
98 IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID);
99 IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS);
100 IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE);
101 IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID);
102 IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN);
103 IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID);
104 IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS);
105 IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE);
107 #define iommu_pmu_en_requester_id(e) ((e) & 0x1)
108 #define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1)
109 #define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1)
110 #define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1)
111 #define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1)
112 #define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff)
113 #define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff)
114 #define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff)
115 #define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f)
116 #define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f)
118 #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \
120 if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \
121 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
122 IOMMU_PMU_CFG_SIZE + \
123 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
124 iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
128 #define iommu_pmu_clear_filter(_filter, _idx) \
130 if (iommu_pmu->filter & _filter) { \
131 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
132 IOMMU_PMU_CFG_SIZE + \
133 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
139 * Define the event attr related functions
140 * Input: _name: event attr name
141 * _string: string of the event in sysfs
142 * _g_idx: event group encoding
143 * _event: event encoding
145 #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \
146 PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \
148 static struct attribute *_name##_attr[] = { \
149 &event_attr_##_name.attr.attr, \
154 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
156 struct device *dev = kobj_to_dev(kobj); \
157 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
161 return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \
164 static struct attribute_group _name = { \
166 .attrs = _name##_attr, \
167 .is_visible = _name##_is_visible, \
170 IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001)
171 IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002)
172 IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004)
173 IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008)
174 IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001)
175 IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020)
176 IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040)
177 IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001)
178 IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002)
179 IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004)
180 IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008)
181 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010)
182 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020)
183 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040)
184 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080)
185 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100)
186 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200)
187 IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001)
188 IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002)
189 IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004)
190 IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008)
191 IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001)
192 IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002)
193 IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004)
195 static const struct attribute_group *iommu_pmu_attr_update[] = {
196 &filter_requester_id_en,
200 &filter_page_table_en,
201 &filter_requester_id,
228 &int_cache_hit_nonposted,
229 &int_cache_hit_posted,
233 static inline void __iomem *
234 iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
236 return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
239 static inline void __iomem *
240 iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
242 return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
245 static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
247 return container_of(event->pmu, struct iommu_pmu, pmu);
250 static inline u64 iommu_event_config(struct perf_event *event)
252 u64 config = event->attr.config;
254 return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
255 (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
259 static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
260 struct perf_event *event)
262 return event->pmu == &iommu_pmu->pmu;
265 static int iommu_pmu_validate_event(struct perf_event *event)
267 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
268 u32 event_group = iommu_event_group(event->attr.config);
270 if (event_group >= iommu_pmu->num_eg)
276 static int iommu_pmu_validate_group(struct perf_event *event)
278 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
279 struct perf_event *sibling;
283 * All events in a group must be scheduled simultaneously.
284 * Check whether there is enough counters for all the events.
286 for_each_sibling_event(sibling, event->group_leader) {
287 if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
288 sibling->state <= PERF_EVENT_STATE_OFF)
291 if (++nr > iommu_pmu->num_cntr)
298 static int iommu_pmu_event_init(struct perf_event *event)
300 struct hw_perf_event *hwc = &event->hw;
302 if (event->attr.type != event->pmu->type)
305 /* sampling not supported */
306 if (event->attr.sample_period)
312 if (iommu_pmu_validate_event(event))
315 hwc->config = iommu_event_config(event);
317 return iommu_pmu_validate_group(event);
320 static void iommu_pmu_event_update(struct perf_event *event)
322 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
323 struct hw_perf_event *hwc = &event->hw;
324 u64 prev_count, new_count, delta;
325 int shift = 64 - iommu_pmu->cntr_width;
328 prev_count = local64_read(&hwc->prev_count);
329 new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
330 if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
334 * The counter width is enumerated. Always shift the counter
337 delta = (new_count << shift) - (prev_count << shift);
340 local64_add(delta, &event->count);
343 static void iommu_pmu_start(struct perf_event *event, int flags)
345 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
346 struct intel_iommu *iommu = iommu_pmu->iommu;
347 struct hw_perf_event *hwc = &event->hw;
350 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
356 if (flags & PERF_EF_RELOAD)
357 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
361 /* Always reprogram the period */
362 count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
363 local64_set((&hwc->prev_count), count);
366 * The error of ecmd will be ignored.
367 * - The existing perf_event subsystem doesn't handle the error.
368 * Only IOMMU PMU returns runtime HW error. We don't want to
369 * change the existing generic interfaces for the specific case.
370 * - It's a corner case caused by HW, which is very unlikely to
371 * happen. There is nothing SW can do.
372 * - The worst case is that the user will get <not count> with
373 * perf command, which can give the user some hints.
375 ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
377 perf_event_update_userpage(event);
380 static void iommu_pmu_stop(struct perf_event *event, int flags)
382 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
383 struct intel_iommu *iommu = iommu_pmu->iommu;
384 struct hw_perf_event *hwc = &event->hw;
386 if (!(hwc->state & PERF_HES_STOPPED)) {
387 ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
389 iommu_pmu_event_update(event);
391 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
396 iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
397 int idx, struct perf_event *event)
399 u32 event_group = iommu_event_group(event->attr.config);
400 u32 select = iommu_event_select(event->attr.config);
402 if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
408 static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
409 struct perf_event *event)
411 struct hw_perf_event *hwc = &event->hw;
415 * The counters which support limited events are usually at the end.
416 * Schedule them first to accommodate more events.
418 for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
419 if (test_and_set_bit(idx, iommu_pmu->used_mask))
421 /* Check per-counter event capabilities */
422 if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
424 clear_bit(idx, iommu_pmu->used_mask);
429 iommu_pmu->event_list[idx] = event;
433 dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
435 iommu_pmu_set_filter(requester_id, event->attr.config1,
436 IOMMU_PMU_FILTER_REQUESTER_ID, idx,
437 event->attr.config1);
438 iommu_pmu_set_filter(domain, event->attr.config1,
439 IOMMU_PMU_FILTER_DOMAIN, idx,
440 event->attr.config1);
441 iommu_pmu_set_filter(pasid, event->attr.config2,
442 IOMMU_PMU_FILTER_PASID, idx,
443 event->attr.config1);
444 iommu_pmu_set_filter(ats, event->attr.config2,
445 IOMMU_PMU_FILTER_ATS, idx,
446 event->attr.config1);
447 iommu_pmu_set_filter(page_table, event->attr.config2,
448 IOMMU_PMU_FILTER_PAGE_TABLE, idx,
449 event->attr.config1);
454 static int iommu_pmu_add(struct perf_event *event, int flags)
456 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
457 struct hw_perf_event *hwc = &event->hw;
460 ret = iommu_pmu_assign_event(iommu_pmu, event);
464 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
466 if (flags & PERF_EF_START)
467 iommu_pmu_start(event, 0);
472 static void iommu_pmu_del(struct perf_event *event, int flags)
474 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
475 int idx = event->hw.idx;
477 iommu_pmu_stop(event, PERF_EF_UPDATE);
479 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
480 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
481 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
482 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
483 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
485 iommu_pmu->event_list[idx] = NULL;
487 clear_bit(idx, iommu_pmu->used_mask);
489 perf_event_update_userpage(event);
492 static void iommu_pmu_enable(struct pmu *pmu)
494 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
495 struct intel_iommu *iommu = iommu_pmu->iommu;
497 ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
500 static void iommu_pmu_disable(struct pmu *pmu)
502 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
503 struct intel_iommu *iommu = iommu_pmu->iommu;
505 ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
508 static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
510 struct perf_event *event;
515 * Two counters may be overflowed very close. Always check
516 * whether there are more to handle.
518 while ((status = dmar_readq(iommu_pmu->overflow))) {
519 for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
521 * Find the assigned event of the counter.
522 * Accumulate the value into the event->count.
524 event = iommu_pmu->event_list[i];
526 pr_warn_once("Cannot find the assigned event for counter %d\n", i);
529 iommu_pmu_event_update(event);
532 dmar_writeq(iommu_pmu->overflow, status);
536 static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
538 struct intel_iommu *iommu = dev_id;
540 if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
543 iommu_pmu_counter_overflow(iommu->pmu);
545 /* Clear the status bit */
546 dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
551 static int __iommu_pmu_register(struct intel_iommu *iommu)
553 struct iommu_pmu *iommu_pmu = iommu->pmu;
555 iommu_pmu->pmu.name = iommu->name;
556 iommu_pmu->pmu.task_ctx_nr = perf_invalid_context;
557 iommu_pmu->pmu.event_init = iommu_pmu_event_init;
558 iommu_pmu->pmu.pmu_enable = iommu_pmu_enable;
559 iommu_pmu->pmu.pmu_disable = iommu_pmu_disable;
560 iommu_pmu->pmu.add = iommu_pmu_add;
561 iommu_pmu->pmu.del = iommu_pmu_del;
562 iommu_pmu->pmu.start = iommu_pmu_start;
563 iommu_pmu->pmu.stop = iommu_pmu_stop;
564 iommu_pmu->pmu.read = iommu_pmu_event_update;
565 iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
566 iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
567 iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
568 iommu_pmu->pmu.module = THIS_MODULE;
570 return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
573 static inline void __iomem *
574 get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
576 u32 off = dmar_readl(iommu->reg + offset);
578 return iommu->reg + off;
581 int alloc_iommu_pmu(struct intel_iommu *iommu)
583 struct iommu_pmu *iommu_pmu;
588 if (!ecap_pms(iommu->ecap))
591 /* The IOMMU PMU requires the ECMD support as well */
592 if (!cap_ecmds(iommu->cap))
595 perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
596 /* The performance monitoring is not supported. */
600 /* Sanity check for the number of the counters and event groups */
601 if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
604 /* The interrupt on overflow is required */
605 if (!pcap_interrupt(perfcap))
608 /* Check required Enhanced Command Capability */
609 if (!ecmd_has_pmu_essential(iommu))
612 iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
616 iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
617 if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
618 pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
619 iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
620 iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
623 iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
624 iommu_pmu->filter = pcap_filters_mask(perfcap);
625 iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
626 iommu_pmu->num_eg = pcap_num_event_group(perfcap);
628 iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
629 if (!iommu_pmu->evcap) {
634 /* Parse event group capabilities */
635 for (i = 0; i < iommu_pmu->num_eg; i++) {
638 pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
639 i * IOMMU_PMU_CAP_REGS_STEP);
640 iommu_pmu->evcap[i] = pecap_es(pcap);
643 iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
644 if (!iommu_pmu->cntr_evcap) {
648 for (i = 0; i < iommu_pmu->num_cntr; i++) {
649 iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
650 if (!iommu_pmu->cntr_evcap[i]) {
652 goto free_pmu_cntr_evcap;
655 * Set to the global capabilities, will adjust according
656 * to per-counter capabilities later.
658 for (j = 0; j < iommu_pmu->num_eg; j++)
659 iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
662 iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
663 iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
664 iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
667 * Check per-counter capabilities. All counters should have the
668 * same capabilities on Interrupt on Overflow Support and Counter
671 for (i = 0; i < iommu_pmu->num_cntr; i++) {
672 cap = dmar_readl(iommu_pmu->cfg_reg +
673 i * IOMMU_PMU_CFG_OFFSET +
674 IOMMU_PMU_CFG_CNTRCAP_OFFSET);
675 if (!iommu_cntrcap_pcc(cap))
679 * It's possible that some counters have a different
680 * capability because of e.g., HW bug. Check the corner
681 * case here and simply drop those counters.
683 if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
684 !iommu_cntrcap_ios(cap)) {
685 iommu_pmu->num_cntr = i;
686 pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
687 iommu_pmu->num_cntr);
690 /* Clear the pre-defined events group */
691 for (j = 0; j < iommu_pmu->num_eg; j++)
692 iommu_pmu->cntr_evcap[i][j] = 0;
694 /* Override with per-counter event capabilities */
695 for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
696 cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
697 IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
698 (j * IOMMU_PMU_OFF_REGS_STEP));
699 iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
701 * Some events may only be supported by a specific counter.
702 * Track them in the evcap as well.
704 iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
708 iommu_pmu->iommu = iommu;
709 iommu->pmu = iommu_pmu;
714 for (i = 0; i < iommu_pmu->num_cntr; i++)
715 kfree(iommu_pmu->cntr_evcap[i]);
716 kfree(iommu_pmu->cntr_evcap);
718 kfree(iommu_pmu->evcap);
725 void free_iommu_pmu(struct intel_iommu *iommu)
727 struct iommu_pmu *iommu_pmu = iommu->pmu;
732 if (iommu_pmu->evcap) {
735 for (i = 0; i < iommu_pmu->num_cntr; i++)
736 kfree(iommu_pmu->cntr_evcap[i]);
737 kfree(iommu_pmu->cntr_evcap);
739 kfree(iommu_pmu->evcap);
744 static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
746 struct iommu_pmu *iommu_pmu = iommu->pmu;
749 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
753 snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
755 iommu->perf_irq = irq;
756 ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
757 IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
759 dmar_free_hwirq(irq);
766 static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
768 if (!iommu->perf_irq)
771 free_irq(iommu->perf_irq, iommu);
772 dmar_free_hwirq(iommu->perf_irq);
776 static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
778 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
780 if (cpumask_empty(&iommu_pmu_cpu_mask))
781 cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
783 if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
784 iommu_pmu->cpu = cpu;
789 static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
791 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
792 int target = cpumask_first(&iommu_pmu_cpu_mask);
795 * The iommu_pmu_cpu_mask has been updated when offline the CPU
796 * for the first iommu_pmu. Migrate the other iommu_pmu to the
799 if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
800 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
801 iommu_pmu->cpu = target;
805 if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
808 target = cpumask_any_but(cpu_online_mask, cpu);
810 if (target < nr_cpu_ids)
811 cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
815 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
816 iommu_pmu->cpu = target;
821 static int nr_iommu_pmu;
822 static enum cpuhp_state iommu_cpuhp_slot;
824 static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
829 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
830 "driver/iommu/intel/perfmon:online",
831 iommu_pmu_cpu_online,
832 iommu_pmu_cpu_offline);
835 iommu_cpuhp_slot = ret;
838 ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
841 cpuhp_remove_multi_state(iommu_cpuhp_slot);
849 static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
851 cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
856 cpuhp_remove_multi_state(iommu_cpuhp_slot);
859 void iommu_pmu_register(struct intel_iommu *iommu)
861 struct iommu_pmu *iommu_pmu = iommu->pmu;
866 if (__iommu_pmu_register(iommu))
869 if (iommu_pmu_cpuhp_setup(iommu_pmu))
872 /* Set interrupt for overflow */
873 if (iommu_pmu_set_interrupt(iommu))
879 iommu_pmu_cpuhp_free(iommu_pmu);
881 perf_pmu_unregister(&iommu_pmu->pmu);
883 pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
884 free_iommu_pmu(iommu);
887 void iommu_pmu_unregister(struct intel_iommu *iommu)
889 struct iommu_pmu *iommu_pmu = iommu->pmu;
894 iommu_pmu_unset_interrupt(iommu);
895 iommu_pmu_cpuhp_free(iommu_pmu);
896 perf_pmu_unregister(&iommu_pmu->pmu);