1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Performance counter support for POWER6 processors.
5 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
7 #include <linux/kernel.h>
8 #include <linux/perf_event.h>
9 #include <linux/string.h>
11 #include <asm/cputable.h>
16 * Bits in event code for POWER6
18 #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19 #define PM_PMC_MSK 0x7
20 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21 #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
22 #define PM_UNIT_MSK 0xf
23 #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
24 #define PM_LLAV 0x8000 /* Load lookahead match value */
25 #define PM_LLA 0x4000 /* Load lookahead match enable */
26 #define PM_BYTE_SH 12 /* Byte of event bus to use */
28 #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
29 #define PM_SUBUNIT_MSK 7
30 #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
31 #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
32 #define PM_BUSEVENT_MSK 0xf3700
35 * Bits in MMCR1 for POWER6
37 #define MMCR1_TTM0SEL_SH 60
38 #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
39 #define MMCR1_TTMSEL_MSK 0xf
40 #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
41 #define MMCR1_NESTSEL_SH 45
42 #define MMCR1_NESTSEL_MSK 0x7
43 #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
44 #define MMCR1_PMC1_LLA (1ul << 44)
45 #define MMCR1_PMC1_LLA_VALUE (1ul << 39)
46 #define MMCR1_PMC1_ADDR_SEL (1ul << 35)
47 #define MMCR1_PMC1SEL_SH 24
48 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49 #define MMCR1_PMCSEL_MSK 0xff
52 * Map of which direct events on which PMCs are marked instruction events.
53 * Indexed by PMCSEL value >> 1.
54 * Bottom 4 bits are a map of which PMCs are interesting,
55 * top 4 bits say what sort of event:
56 * 0 = direct marked event,
57 * 1 = byte decode event,
58 * 4 = add/and event (PMC1 -> bits 0 & 4),
59 * 5 = add/and event (PMC1 -> bits 1 & 5),
60 * 6 = add/and event (PMC1 -> bits 2 & 6),
61 * 7 = add/and event (PMC1 -> bits 3 & 7).
63 static unsigned char direct_event_is_marked[0x60 >> 1] = {
67 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
68 0x04, /* 08 PM_MRK_DFU_FIN */
69 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
72 0x02, /* 10 PM_MRK_INST_DISP */
73 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
76 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
77 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
78 0x01, /* 1c PM_MRK_INST_ISSUED */
84 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
95 0x08, /* 3e PM_MRK_INST_TIMEO */
105 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
106 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
107 0x02, /* 56 PM_MRK_LD_MISS_L1 */
115 * Masks showing for each unit which bits are marked events.
116 * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
118 static u32 marked_bus_events[16] = {
119 0x01000000, /* direct events set 1: byte 3 bit 0 */
120 0x00010000, /* direct events set 2: byte 2 bit 0 */
121 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
122 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
123 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
124 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
125 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
127 0x00000010, /* VMX set 3: byte 0 bit 4 */
129 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
134 * Returns 1 if event counts things relating to marked instructions
135 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
137 static int power6_marked_instr_event(u64 event)
139 int pmc, psel, ptype;
143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
144 psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
149 if (psel < sizeof(direct_event_is_marked)) {
150 ptype = direct_event_is_marked[psel];
151 if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
159 bit = ptype ^ (pmc - 1);
160 } else if ((psel & 0x48) == 0x40)
163 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
167 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
168 mask = marked_bus_events[unit];
169 return (mask >> (byte * 8 + bit)) & 1;
173 * Assign PMC numbers and compute MMCR1 value for a set of events
175 static int p6_compute_mmcr(u64 event[], int n_ev,
176 unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
177 u32 flags __maybe_unused)
179 unsigned long mmcr1 = 0;
180 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
182 unsigned int pmc, ev, b, u, s, psel;
183 unsigned int ttmset = 0;
184 unsigned int pmc_inuse = 0;
188 for (i = 0; i < n_ev; ++i) {
189 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
191 if (pmc_inuse & (1 << (pmc - 1)))
192 return -1; /* collision! */
193 pmc_inuse |= 1 << (pmc - 1);
196 for (i = 0; i < n_ev; ++i) {
198 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
202 /* can go on any PMC; find a free one */
203 for (pmc = 0; pmc < 4; ++pmc)
204 if (!(pmc_inuse & (1 << pmc)))
208 pmc_inuse |= 1 << pmc;
211 psel = ev & PM_PMCSEL_MSK;
212 if (ev & PM_BUSEVENT_MSK) {
213 /* this event uses the event bus */
214 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
215 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
216 /* check for conflict on this byte of event bus */
217 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
219 mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
222 /* Nest events have a further mux */
223 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
224 if ((ttmset & 0x10) &&
225 MMCR1_NESTSEL(mmcr1) != s)
228 mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
230 if (0x30 <= psel && psel <= 0x3d) {
231 /* these need the PMCx_ADDR_SEL bits */
233 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
235 /* bus select values are different for PMC3/4 */
236 if (pmc >= 2 && (psel & 0x90) == 0x80)
240 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
242 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
244 if (power6_marked_instr_event(event[i]))
245 mmcra |= MMCRA_SAMPLE_ENABLE;
247 mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
251 mmcr->mmcr0 = MMCR0_PMC1CE;
253 mmcr->mmcr0 |= MMCR0_PMCjCE;
260 * Layout of constraint bits:
262 * 0-1 add field: number of uses of PMC1 (max 1)
263 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
264 * 12-15 add field: number of uses of PMC1-4 (max 4)
265 * 16-19 select field: unit on byte 0 of event bus
266 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
267 * 32-34 select field: nest (subunit) event selector
269 static int p6_get_constraint(u64 event, unsigned long *maskp,
270 unsigned long *valp, u64 event_config1 __maybe_unused)
272 int pmc, byte, sh, subunit;
273 unsigned long mask = 0, value = 0;
275 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
277 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
283 if (event & PM_BUSEVENT_MSK) {
284 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
285 sh = byte * 4 + (16 - PM_UNIT_SH);
286 mask |= PM_UNIT_MSKS << sh;
287 value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
288 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
289 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
290 mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
291 value |= (unsigned long)subunit << 32;
295 mask |= 0x8000; /* add field for count of PMC1-4 uses */
303 static int p6_limited_pmc_event(u64 event)
305 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
307 return pmc == 5 || pmc == 6;
310 #define MAX_ALT 4 /* at most 4 alternatives for any event */
312 static const unsigned int event_alternatives[][MAX_ALT] = {
313 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
314 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
315 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
316 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
317 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
318 { 0x10000e, 0x400010 }, /* PM_PURR */
319 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
320 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
321 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
322 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
323 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
324 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
325 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
326 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
327 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
328 { 0x200012, 0x300012 }, /* PM_INST_DISP */
329 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
330 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
331 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
332 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
333 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
334 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
335 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
338 static int find_alternatives_list(u64 event)
340 const unsigned int presorted_event_table[] = {
341 0x0130e8, 0x080080, 0x080088, 0x10000a, 0x10000b, 0x10000d, 0x10000e,
342 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 0x1000f8,
343 0x1000fc, 0x200008, 0x20000e, 0x200010, 0x200012, 0x200054, 0x2000f0,
344 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 0x2000fe,
345 0x2d0030, 0x30000a, 0x30000c, 0x300010, 0x300012, 0x30001a, 0x300056,
346 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 0x400006,
347 0x400007, 0x40000a, 0x40000e, 0x400010, 0x400018, 0x400056, 0x4000f0,
350 const unsigned int event_index_table[] = {
351 0, 1, 2, 3, 4, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 14,
352 7, 15, 2, 9, 16, 3, 4, 0, 17, 10, 18, 19, 20, 1, 17, 15, 19,
353 18, 2, 16, 21, 8, 0, 22, 13, 14, 11, 21, 5, 20, 22, 1, 6, 3
355 int hi = ARRAY_SIZE(presorted_event_table) - 1;
359 int mid = lo + (hi - lo) / 2;
360 unsigned int alt = presorted_event_table[mid];
364 else if (alt > event)
367 return event_index_table[mid];
373 static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
376 unsigned int psel, pmc;
377 unsigned int nalt = 1;
381 nlim = p6_limited_pmc_event(event);
383 /* check the alternatives table */
384 i = find_alternatives_list(event);
386 /* copy out alternatives from list */
387 for (j = 0; j < MAX_ALT; ++j) {
388 aevent = event_alternatives[i][j];
392 alt[nalt++] = aevent;
393 nlim += p6_limited_pmc_event(aevent);
397 /* Check for alternative ways of computing sum events */
398 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
399 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
400 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
401 if (pmc && (psel == 0x32 || psel == 0x34))
402 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
403 ((5 - pmc) << PM_PMC_SH);
405 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
406 if (pmc && (psel == 0x38 || psel == 0x3a))
407 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
408 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
411 if (flags & PPMU_ONLY_COUNT_RUN) {
413 * We're only counting in RUN state,
414 * so PM_CYC is equivalent to PM_RUN_CYC,
415 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
416 * This doesn't include alternatives that don't provide
417 * any extra flexibility in assigning PMCs (e.g.
418 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
419 * Note that even with these additional alternatives
420 * we never end up with more than 4 alternatives for any event.
423 for (i = 0; i < nalt; ++i) {
425 case 0x1e: /* PM_CYC */
426 alt[j++] = 0x600005; /* PM_RUN_CYC */
429 case 0x10000a: /* PM_RUN_CYC */
430 alt[j++] = 0x1e; /* PM_CYC */
432 case 2: /* PM_INST_CMPL */
433 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
436 case 0x500009: /* PM_RUN_INST_CMPL */
437 alt[j++] = 2; /* PM_INST_CMPL */
439 case 0x10000e: /* PM_PURR */
440 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
442 case 0x4000f4: /* PM_RUN_PURR */
443 alt[j++] = 0x10000e; /* PM_PURR */
450 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
451 /* remove the limited PMC events */
453 for (i = 0; i < nalt; ++i) {
454 if (!p6_limited_pmc_event(alt[i])) {
460 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
461 /* remove all but the limited PMC events */
463 for (i = 0; i < nalt; ++i) {
464 if (p6_limited_pmc_event(alt[i])) {
475 static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
477 /* Set PMCxSEL to 0 to disable PMCx */
479 mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
482 static int power6_generic_events[] = {
483 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
484 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
485 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
486 [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
487 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
488 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
491 #define C(x) PERF_COUNT_HW_CACHE_##x
494 * Table of generalized cache-related events.
495 * 0 means not supported, -1 means nonsensical, other values
497 * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
499 static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
500 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
501 [C(OP_READ)] = { 0x280030, 0x80080 },
502 [C(OP_WRITE)] = { 0x180032, 0x80088 },
503 [C(OP_PREFETCH)] = { 0x810a4, 0 },
505 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
506 [C(OP_READ)] = { 0, 0x100056 },
507 [C(OP_WRITE)] = { -1, -1 },
508 [C(OP_PREFETCH)] = { 0x4008c, 0 },
510 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
511 [C(OP_READ)] = { 0x150730, 0x250532 },
512 [C(OP_WRITE)] = { 0x250432, 0x150432 },
513 [C(OP_PREFETCH)] = { 0x810a6, 0 },
515 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
516 [C(OP_READ)] = { 0, 0x20000e },
517 [C(OP_WRITE)] = { -1, -1 },
518 [C(OP_PREFETCH)] = { -1, -1 },
520 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
521 [C(OP_READ)] = { 0, 0x420ce },
522 [C(OP_WRITE)] = { -1, -1 },
523 [C(OP_PREFETCH)] = { -1, -1 },
525 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
526 [C(OP_READ)] = { 0x430e6, 0x400052 },
527 [C(OP_WRITE)] = { -1, -1 },
528 [C(OP_PREFETCH)] = { -1, -1 },
530 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
531 [C(OP_READ)] = { -1, -1 },
532 [C(OP_WRITE)] = { -1, -1 },
533 [C(OP_PREFETCH)] = { -1, -1 },
537 static struct power_pmu power6_pmu = {
540 .max_alternatives = MAX_ALT,
541 .add_fields = 0x1555,
542 .test_adder = 0x3000,
543 .compute_mmcr = p6_compute_mmcr,
544 .get_constraint = p6_get_constraint,
545 .get_alternatives = p6_get_alternatives,
546 .disable_pmc = p6_disable_pmc,
547 .limited_pmc_event = p6_limited_pmc_event,
548 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
549 .n_generic = ARRAY_SIZE(power6_generic_events),
550 .generic_events = power6_generic_events,
551 .cache_events = &power6_cache_events,
554 int __init init_power6_pmu(void)
556 unsigned int pvr = mfspr(SPRN_PVR);
558 if (PVR_VER(pvr) != PVR_POWER6)
561 return register_power_pmu(&power6_pmu);