1 // SPDX-License-Identifier: GPL-2.0
3 * Performance event support - Processor Activity Instrumentation Extension
6 * Copyright IBM Corp. 2022
7 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
9 #define KMSG_COMPONENT "pai_ext"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/percpu.h>
15 #include <linux/notifier.h>
16 #include <linux/init.h>
17 #include <linux/export.h>
20 #include <asm/cpu_mcf.h>
21 #include <asm/ctl_reg.h>
23 #include <asm/debug.h>
25 #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
26 #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
28 static debug_info_t *paiext_dbg;
29 static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
42 /* Create the PAI extension 1 control block area.
43 * The PAI extension control block 1 is pointed to by lowcore
44 * address 0x1508 for each CPU. This control block is 512 bytes in size
45 * and requires a 512 byte boundary alignment.
47 struct paiext_cb { /* PAI extension 1 control block */
48 u64 header; /* Not used */
50 u64 acc; /* Addr to analytics counter control block */
55 unsigned long *area; /* Area for CPU to store counters */
56 struct pai_userdata *save; /* Area to store non-zero counters */
57 enum paiext_mode mode; /* Type of event */
58 unsigned int active_events; /* # of PAI Extension users */
60 struct perf_event *event; /* Perf event for sampling */
61 struct paiext_cb *paiext_cb; /* PAI extension control block area */
64 struct paiext_mapptr {
65 struct paiext_map *mapptr;
68 static struct paiext_root { /* Anchor to per CPU data */
69 int refcnt; /* Overall active events */
70 struct paiext_mapptr __percpu *mapptr;
73 /* Free per CPU data when the last event is removed. */
74 static void paiext_root_free(void)
76 if (!--paiext_root.refcnt) {
77 free_percpu(paiext_root.mapptr);
78 paiext_root.mapptr = NULL;
82 /* On initialization of first event also allocate per CPU data dynamically.
83 * Start with an array of pointers, the array size is the maximum number of
84 * CPUs possible, which might be larger than the number of CPUs currently
87 static int paiext_root_alloc(void)
89 if (++paiext_root.refcnt == 1) {
90 /* The memory is already zeroed. */
91 paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
92 if (!paiext_root.mapptr) {
93 /* Returing without refcnt adjustment is ok. The
94 * error code is handled by paiext_alloc() which
95 * decrements refcnt when an event can not be
104 /* Protects against concurrent increment of sampler and counter member
105 * increments at the same time and prohibits concurrent execution of
106 * counting and sampling events.
107 * Ensures that analytics counter block is deallocated only when the
108 * sampling and counting on that cpu is zero.
109 * For details see paiext_alloc().
111 static DEFINE_MUTEX(paiext_reserve_mutex);
113 /* Free all memory allocated for event counting/sampling setup */
114 static void paiext_free(struct paiext_mapptr *mp)
116 kfree(mp->mapptr->area);
117 kfree(mp->mapptr->paiext_cb);
118 kvfree(mp->mapptr->save);
123 /* Release the PMU if event is the last perf event */
124 static void paiext_event_destroy(struct perf_event *event)
126 struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
127 struct paiext_map *cpump = mp->mapptr;
129 mutex_lock(&paiext_reserve_mutex);
131 if (!--cpump->refcnt) /* Last reference gone */
134 mutex_unlock(&paiext_reserve_mutex);
135 debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
136 event->cpu, mp->mapptr);
140 /* Used to avoid races in checking concurrent access of counting and
141 * sampling for pai_extension events.
143 * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
144 * allowed and when this event is running, no counting event is allowed.
145 * Several counting events are allowed in parallel, but no sampling event
146 * is allowed while one (or more) counting events are running.
148 * This function is called in process context and it is safe to block.
149 * When the event initialization functions fails, no other call back will
152 * Allocate the memory for the event.
154 static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
156 struct paiext_mapptr *mp;
157 struct paiext_map *cpump;
160 mutex_lock(&paiext_reserve_mutex);
162 rc = paiext_root_alloc();
166 mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
168 if (!cpump) { /* Paiext_map allocated? */
170 cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
174 /* Allocate memory for counter area and counter extraction.
176 * - a 512 byte block and requires 512 byte boundary alignment.
177 * - a 1KB byte block and requires 1KB boundary alignment.
178 * Only the first counting event has to allocate the area.
180 * Note: This works with commit 59bb47985c1d by default.
181 * Backporting this to kernels without this commit might
185 cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
186 cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
187 cpump->save = kvmalloc_array(paiext_cnt + 1,
188 sizeof(struct pai_userdata),
190 if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
194 cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
197 /* Multiple invocation, check whats active.
198 * Supported are multiple counter events or only one sampling
199 * event concurrently at any one time.
201 if (cpump->mode == PAI_MODE_SAMPLING ||
202 (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
209 cpump->event = event;
214 /* Error in allocation of event, decrement anchor. Since
215 * the event in not created, its destroy() function is never
216 * invoked. Adjust the reference counter for the anchor.
220 mutex_unlock(&paiext_reserve_mutex);
221 /* If rc is non-zero, no increment of counter/sampler was done. */
225 /* The PAI extension 1 control block supports up to 128 entries. Return
226 * the index within PAIE1_CB given the event number. Also validate event
229 static int paiext_event_valid(struct perf_event *event)
231 u64 cfg = event->attr.config;
233 if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
234 /* Offset NNPA in paiext_cb */
235 event->hw.config_base = offsetof(struct paiext_cb, acc);
241 /* Might be called on different CPU than the one the event is intended for. */
242 static int paiext_event_init(struct perf_event *event)
244 struct perf_event_attr *a = &event->attr;
247 /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
248 if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
250 /* PAI extension event must be valid and in supported range */
251 rc = paiext_event_valid(event);
254 /* Allow only CPU wide operation, no process context for now. */
255 if (event->hw.target || event->cpu == -1)
257 /* Allow only event NNPA_ALL for sampling. */
258 if (a->sample_period && a->config != PAI_NNPA_BASE)
260 /* Prohibit exclude_user event selection */
264 rc = paiext_alloc(a, event);
267 event->hw.last_tag = 0;
268 event->destroy = paiext_event_destroy;
270 if (a->sample_period) {
271 a->sample_period = 1;
273 /* Register for paicrypt_sched_task() to be called */
274 event->attach_state |= PERF_ATTACH_SCHED_CB;
275 /* Add raw data which are the memory mapped counters */
276 a->sample_type |= PERF_SAMPLE_RAW;
277 /* Turn off inheritance */
284 static u64 paiext_getctr(struct paiext_map *cpump, int nr)
286 return cpump->area[nr];
289 /* Read the counter values. Return value from location in buffer. For event
290 * NNPA_ALL sum up all events.
292 static u64 paiext_getdata(struct perf_event *event)
294 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
295 struct paiext_map *cpump = mp->mapptr;
299 if (event->attr.config != PAI_NNPA_BASE)
300 return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
302 for (i = 1; i <= paiext_cnt; i++)
303 sum += paiext_getctr(cpump, i);
308 static u64 paiext_getall(struct perf_event *event)
310 return paiext_getdata(event);
313 static void paiext_read(struct perf_event *event)
315 u64 prev, new, delta;
317 prev = local64_read(&event->hw.prev_count);
318 new = paiext_getall(event);
319 local64_set(&event->hw.prev_count, new);
321 local64_add(delta, &event->count);
324 static void paiext_start(struct perf_event *event, int flags)
328 if (event->hw.last_tag)
330 event->hw.last_tag = 1;
331 sum = paiext_getall(event); /* Get current value */
332 local64_set(&event->hw.prev_count, sum);
333 local64_set(&event->count, 0);
336 static int paiext_add(struct perf_event *event, int flags)
338 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
339 struct paiext_map *cpump = mp->mapptr;
340 struct paiext_cb *pcb = cpump->paiext_cb;
342 if (++cpump->active_events == 1) {
343 S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
344 pcb->acc = virt_to_phys(cpump->area) | 0x1;
345 /* Enable CPU instruction lookup for PAIE1 control block */
346 __ctl_set_bit(0, 49);
347 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
348 __func__, S390_lowcore.aicd, pcb->acc);
350 if (flags & PERF_EF_START && !event->attr.sample_period) {
351 /* Only counting needs initial counter value */
352 paiext_start(event, PERF_EF_RELOAD);
355 if (event->attr.sample_period) {
356 cpump->event = event;
357 perf_sched_cb_inc(event->pmu);
362 static void paiext_stop(struct perf_event *event, int flags)
365 event->hw.state = PERF_HES_STOPPED;
368 static void paiext_del(struct perf_event *event, int flags)
370 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
371 struct paiext_map *cpump = mp->mapptr;
372 struct paiext_cb *pcb = cpump->paiext_cb;
374 if (event->attr.sample_period)
375 perf_sched_cb_dec(event->pmu);
376 if (!event->attr.sample_period) {
377 /* Only counting needs to read counter */
378 paiext_stop(event, PERF_EF_UPDATE);
380 if (--cpump->active_events == 0) {
381 /* Disable CPU instruction lookup for PAIE1 control block */
382 __ctl_clear_bit(0, 49);
384 S390_lowcore.aicd = 0;
385 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
386 __func__, S390_lowcore.aicd, pcb->acc);
390 /* Create raw data and save it in buffer. Returns number of bytes copied.
391 * Saves only positive counter entries of the form
392 * 2 bytes: Number of counter
393 * 8 bytes: Value of counter
395 static size_t paiext_copy(struct paiext_map *cpump)
397 struct pai_userdata *userdata = cpump->save;
400 for (i = 1; i <= paiext_cnt; i++) {
401 u64 val = paiext_getctr(cpump, i);
404 userdata[outidx].num = i;
405 userdata[outidx].value = val;
409 return outidx * sizeof(*userdata);
412 /* Write sample when one or more counters values are nonzero.
414 * Note: The function paiext_sched_task() and paiext_push_sample() are not
415 * invoked after function paiext_del() has been called because of function
416 * perf_sched_cb_dec().
417 * The function paiext_sched_task() and paiext_push_sample() are only
418 * called when sampling is active. Function perf_sched_cb_inc()
419 * has been invoked to install function paiext_sched_task() as call back
420 * to run at context switch time (see paiext_add()).
422 * This causes function perf_event_context_sched_out() and
423 * perf_event_context_sched_in() to check whether the PMU has installed an
424 * sched_task() callback. That callback is not active after paiext_del()
425 * returns and has deleted the event on that CPU.
427 static int paiext_push_sample(void)
429 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
430 struct paiext_map *cpump = mp->mapptr;
431 struct perf_event *event = cpump->event;
432 struct perf_sample_data data;
433 struct perf_raw_record raw;
438 rawsize = paiext_copy(cpump);
439 if (!rawsize) /* No incremented counters */
442 /* Setup perf sample */
443 memset(®s, 0, sizeof(regs));
444 memset(&raw, 0, sizeof(raw));
445 memset(&data, 0, sizeof(data));
446 perf_sample_data_init(&data, 0, event->hw.last_period);
447 if (event->attr.sample_type & PERF_SAMPLE_TID) {
448 data.tid_entry.pid = task_tgid_nr(current);
449 data.tid_entry.tid = task_pid_nr(current);
451 if (event->attr.sample_type & PERF_SAMPLE_TIME)
452 data.time = event->clock();
453 if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
455 if (event->attr.sample_type & PERF_SAMPLE_CPU)
456 data.cpu_entry.cpu = smp_processor_id();
457 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
458 raw.frag.size = rawsize;
459 raw.frag.data = cpump->save;
460 raw.size = raw.frag.size;
462 data.sample_flags |= PERF_SAMPLE_RAW;
465 overflow = perf_event_overflow(event, &data, ®s);
466 perf_event_update_userpage(event);
467 /* Clear lowcore area after read */
468 memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
472 /* Called on schedule-in and schedule-out. No access to event structure,
473 * but for sampling only event NNPA_ALL is allowed.
475 static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
477 /* We started with a clean page on event installation. So read out
478 * results on schedule_out and if page was dirty, clear values.
481 paiext_push_sample();
484 /* Attribute definitions for pai extension1 interface. As with other CPU
485 * Measurement Facilities, there is one attribute per mapped counter.
486 * The number of mapped counters may vary per machine generation. Use
487 * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
488 * to determine the number of mapped counters. The instructions returns
489 * a positive number, which is the highest number of supported counters.
490 * All counters less than this number are also supported, there are no
491 * holes. A returned number of zero means no support for mapped counters.
493 * The identification of the counter is a unique number. The chosen range
494 * is 0x1800 + offset in mapped kernel page.
495 * All CPU Measurement Facility counters identifiers must be unique and
496 * the numbers from 0 to 496 are already used for the CPU Measurement
497 * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
499 * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
500 * used for the CPU Measurement Sampling facility.
502 PMU_FORMAT_ATTR(event, "config:0-63");
504 static struct attribute *paiext_format_attr[] = {
505 &format_attr_event.attr,
509 static struct attribute_group paiext_events_group = {
511 .attrs = NULL, /* Filled in attr_event_init() */
514 static struct attribute_group paiext_format_group = {
516 .attrs = paiext_format_attr,
519 static const struct attribute_group *paiext_attr_groups[] = {
520 &paiext_events_group,
521 &paiext_format_group,
525 /* Performance monitoring unit for mapped counters */
526 static struct pmu paiext = {
527 .task_ctx_nr = perf_invalid_context,
528 .event_init = paiext_event_init,
531 .start = paiext_start,
534 .sched_task = paiext_sched_task,
535 .attr_groups = paiext_attr_groups,
538 /* List of symbolic PAI extension 1 NNPA counter names. */
539 static const char * const paiext_ctrnames[] = {
549 [9] = "NNPA_IBM_RESERVED_9",
552 [12] = "NNPA_SIGMOID",
553 [13] = "NNPA_SOFTMAX",
554 [14] = "NNPA_BATCHNORM",
555 [15] = "NNPA_MAXPOOL2D",
556 [16] = "NNPA_AVGPOOL2D",
557 [17] = "NNPA_LSTMACT",
558 [18] = "NNPA_GRUACT",
559 [19] = "NNPA_CONVOLUTION",
560 [20] = "NNPA_MATMUL_OP",
561 [21] = "NNPA_MATMUL_OP_BCAST23",
562 [22] = "NNPA_SMALLBATCH",
563 [23] = "NNPA_LARGEDIM",
564 [24] = "NNPA_SMALLTENSOR",
565 [25] = "NNPA_1MFRAME",
566 [26] = "NNPA_2GFRAME",
567 [27] = "NNPA_ACCESSEXCEPT",
570 static void __init attr_event_free(struct attribute **attrs, int num)
572 struct perf_pmu_events_attr *pa;
573 struct device_attribute *dap;
576 for (i = 0; i < num; i++) {
577 dap = container_of(attrs[i], struct device_attribute, attr);
578 pa = container_of(dap, struct perf_pmu_events_attr, attr);
584 static int __init attr_event_init_one(struct attribute **attrs, int num)
586 struct perf_pmu_events_attr *pa;
588 pa = kzalloc(sizeof(*pa), GFP_KERNEL);
592 sysfs_attr_init(&pa->attr.attr);
593 pa->id = PAI_NNPA_BASE + num;
594 pa->attr.attr.name = paiext_ctrnames[num];
595 pa->attr.attr.mode = 0444;
596 pa->attr.show = cpumf_events_sysfs_show;
597 pa->attr.store = NULL;
598 attrs[num] = &pa->attr.attr;
602 /* Create PMU sysfs event attributes on the fly. */
603 static int __init attr_event_init(void)
605 struct attribute **attrs;
608 attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
612 for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
613 ret = attr_event_init_one(attrs, i);
615 attr_event_free(attrs, i);
620 paiext_events_group.attrs = attrs;
624 static int __init paiext_init(void)
626 struct qpaci_info_block ib;
629 if (!test_facility(197))
633 paiext_cnt = ib.num_nnpa;
634 if (paiext_cnt >= PAI_NNPA_MAXCTR)
635 paiext_cnt = PAI_NNPA_MAXCTR;
639 rc = attr_event_init();
641 pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
645 /* Setup s390dbf facility */
646 paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
648 pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
652 debug_register_view(paiext_dbg, &debug_sprintf_view);
654 rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
656 pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
664 debug_unregister_view(paiext_dbg, &debug_sprintf_view);
665 debug_unregister(paiext_dbg);
667 attr_event_free(paiext_events_group.attrs,
668 ARRAY_SIZE(paiext_ctrnames) + 1);
672 device_initcall(paiext_init);